Module:User:Theknightwho/etymology

This is a private module sandbox of Theknightwho, for their own experimentation. Items in this module may be added and removed at Theknightwho's discretion; do not rely on this module's stability.


local export = {}

-- For testing
local force_cat = false

local function term_error(terminfo)
	if terminfo.lang:hasType("family") then
		if terminfo.term and terminfo.term ~= "-" then
			require("Module:debug/track")("etymology/family/has-term")
		end
		
		terminfo.term = "-"
	end
	return terminfo
end


local function create_link(terminfo, template_name)
	local link = ""
	
	if terminfo.term == "-" then
		--[=[
		[[Special:WhatLinksHere/Wiktionary:Tracking/cognate/no-term]]
		[[Special:WhatLinksHere/Wiktionary:Tracking/derived/no-term]]
		[[Special:WhatLinksHere/Wiktionary:Tracking/borrowed/no-term]]
		[[Special:WhatLinksHere/Wiktionary:Tracking/calque/no-term]]
		]=]
		require("Module:debug/track")(template_name .. "/no-term")
	else
--		mw.log(terminfo.term)
		link = " " .. require("Module:links").full_link(terminfo, "term", true)
	end
	
	return link
end


function export.process_and_create_link(terminfo, template_name)
	terminfo = term_error(terminfo)
	return create_link(terminfo, template_name or "derived")
end
	

function export.get_display_and_cat_name(source, raw)
	local display, cat_name
	if source:getCode() == "und" then
		display = "undetermined"
		cat_name = "other languages"
	elseif source:getCode() == "mul" then
		display = "[[w:Translingualism|translingual]]"
		cat_name = "Translingual"
	elseif source:getCode() == "mul-tax" then
		display = "[[w:taxonomic name|taxonomic name]]"
		cat_name = "taxonomic names"
	else
		display = raw and source:getCanonicalName() or source:makeWikipediaLink()
		cat_name = source:getDisplayForm()
	end

	return display, cat_name
end


function export.insert_source_cat_get_display(categories, lang, source, raw, nocat)
	local display, cat_name = export.get_display_and_cat_name(source, raw)

	if lang and not nocat then
		-- Add the category, but only if there is a current language
		if not categories then
			categories = {}
		end
	
		if lang:getCode() == source:getCode() then
			table.insert(categories, lang:getCanonicalName() .. " twice-borrowed terms")
		else
			table.insert(categories, lang:getCanonicalName() .. " terms derived from " .. cat_name)
		end
	end

	return display, categories
end


-- FIXME: rename to format_source()
function export.format_etyl(lang, source, sort_key, categories, nocat)
	-- [[Special:WhatLinksHere/Wiktionary:Tracking/etymology/sortkey]]
	if sort_key then
		require("Module:debug/track")("etymology/sortkey")
	end

	local display
	display, categories = export.insert_source_cat_get_display(categories, lang, source, false, nocat)
	if lang and not nocat then
		-- Format categories, but only if there is a current language; {{cog}} currently gets no categories
		categories = require("Module:utilities").format_categories(categories, lang, sort_key, nil, force_cat)
	else
		categories = ""
	end
	
	return "<span class=\"etyl\">" .. display .. categories .. "</span>"
end


-- Internal implementation of {{cognate|...}} template
function export.format_cognate(terminfo, sort_key)
	return export.format_derived(nil, terminfo, sort_key, nil, "cognate")
end


-- Internal implementation of {{derived|...}} template
function export.format_derived(lang, terminfo, sort_key, nocat, template_name)
	local source = terminfo.lang
	return export.format_etyl(lang, source, sort_key, nil, nocat) .. export.process_and_create_link(terminfo, template_name)
end


-- Return true if `lang` has `otherlang` (which may be an etymology-only language) as an ancestor. This includes
-- cases such as Italian -> Old Italian, which is an etymology-only language whose "parent" is Italian but which is
-- marked as "ancestral to parent".
local function has_ancestor(lang, otherlang)
	-- First check if `lang` has `otherlang` as its ancestor. If `lang` is e.g. Italian and `otherlang` is Latin, this will be caught here.
	-- If there is no match, check each parent of `otherlang` in the etymology language hierarchy. If `lang` is Italian and `otherlang` is Vulgar Latin (an etymology-only language whose parent is Latin), this will be caught here, as Latin is the parent of Vulgar Latin. However, if `lang` is Tajik and `otherlang` is Persian, this will *not* be caught here, even though Classical Persian (an etymology-only variety of Persian) is the ancestor of Tajik. This is because the check only goes up in the parental hierarchy, not down. This has the effect of restricting Tajik so that it can only inherit from Classical Persian (or any children of Classical Persian in the hierarchy). On the other hand, a language which has Persian as its ancestor could inherit from all varieties of Persian.
	local check = otherlang
	while check do
		if lang:hasAncestor(check) then
			return true
		else
			check = check:getParent()
		end
	end
	-- Second check for the Italian -> Old Italian case. This happens when (a) `otherlang` is etymology-only,
	-- (b) `otherlang` is marked as "ancestral to parent", (c) `otherlang`'s parent is the same as `lang`. This is not
	-- caught above because Italian is not ancestral to Italian.
	if otherlang:hasType("etymology-only") and otherlang:isAncestralToParent() then
		local otherlang_parent = otherlang:getFull()
		if otherlang_parent and otherlang_parent:getCode() == lang:getCode() then
			return true
		end
	end
	return false
end


-- Check that `lang` has `otherlang` (which may be an etymology-only language) as an ancestor. Throw an error if not.
local function check_ancestor(lang, otherlang)
	if not has_ancestor(lang, otherlang) and mw.title.getCurrentTitle().nsText ~= "Template" then
		-- Generate the non-ancestor error message.
		local function showLanguage(lang)
			local retval = ("%s (%s)"):format(lang:makeCategoryLink(), lang:getCode())
			if lang:hasType("etymology-only") then
				retval = retval .. (" (an etymology-only language whose regular parent is %s)"):format(
					showLanguage(lang:getParent()))
			end
			return retval
		end
		local postscript
		local ancestors = lang:getAncestors()
		local moduleLink = "[[Module:"
			.. require("Module:User:Theknightwho/languages").getDataModuleName(lang:getCode())
			.. "]]"
		if not ancestors[1] then
			postscript = showLanguage(lang) .. " has no ancestors."
		else
			local ancestorList = table.concat(
				require("Module:fun").map(
					showLanguage,
					ancestors),
				" and ")
			postscript = ("The ancestor%s of %s %s %s."):format(
				ancestors[2] and "s" or "", lang:getCanonicalName(),
				ancestors[2] and "are" or "is", ancestorList)
		end
		error(("%s is not set as an ancestor of %s in %s. %s")
			:format(showLanguage(otherlang), showLanguage(lang), moduleLink, postscript))
	end
end


-- Internal implementation of {{inherited|...}} template
function export.format_inherited(lang, terminfo, sort_key, nocat)
	local source = terminfo.lang
	
	local categories = {}
	if not nocat then
		table.insert(categories, lang:getCanonicalName() .. " terms inherited from " .. source:getCanonicalName())
	end

	local link = export.process_and_create_link(terminfo, "inherited")
	
	check_ancestor(lang, source)

	return export.format_etyl(lang, source, sort_key, categories, nocat) .. link
end


function export.insert_borrowed_cat(categories, lang, source)
	local category
	if lang:getCode() == source:getCode() then
		category = " twice-borrowed terms"
	else
		category = " terms borrowed from " .. source:getDisplayForm()
	end
	table.insert(categories, lang:getCanonicalName() .. category)
end


-- Internal implementation of {{borrowed|...}} template.
function export.format_borrowed(lang, terminfo, sort_key, nocat)
	local source = terminfo.lang
	
	local categories = {}
	if not nocat then
		export.insert_borrowed_cat(categories, lang, source)
	end

	return export.format_etyl(lang, source, sort_key, categories, nocat) ..
		export.process_and_create_link(terminfo, "borrowed")
end

return export