Module:category tree/poscatboiler/data/lang-specific/uk


This module handles generating the descriptions and categorization for Ukrainian category pages of the format "Ukrainian LABEL" where LABEL can be any text. Examples are Category:Bulgarian conjugation 2.1 verbs and Category:Russian velar-stem neuter-form nouns. This module is part of the poscatboiler system, which is a general framework for generating the descriptions and categorization of category pages.

For more information, see Module:category tree/poscatboiler/data/lang-specific/documentation.

NOTE: If you add a new language-specific module, you must add the language code to the list at the top of Module:category tree/poscatboiler/data/lang-specific in order for the module to be recognized.


local labels = {}
local handlers = {}

local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local rsubn = mw.ustring.gsub

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end


--------------------------------- Verbs --------------------------------

labels["verbs by class"] = {
	description = "Ukrainian verbs categorized by class.",
	parents = {{name = "verbs by inflection type", sort = "class"}},
}

labels["verbs by class and accent pattern"] = {
	description = "Ukrainian verbs categorized by class and accent pattern.",
	parents = {{name = "verbs by inflection type", sort = "class and accent pattern"}},
}

table.insert(handlers, function(data)
	local cls, variant, pattern = rmatch(data.label, "^class ([0-9]*)([()%[%]°]*)([abc]?) verbs$")
	if cls then
		if pattern == "" then
			return {
				description = "Ukrainian class " .. cls .. " verbs.",
				breadcrumb = cls,
				parents = {{name = "verbs by class", sort = cls .. variant}},
			}
		else
			return {
				description = "Ukrainian class " .. cls .. " verbs of " ..
					"accent pattern " .. pattern .. (
					variant == "" and "" or " and variant " .. variant) .. ". " .. (
					pattern == "a" and "With this pattern, all forms are stem-stressed."
					or pattern == "b" and "With this pattern, all forms are ending-stressed."
					or "With this pattern, the first singular present indicative and all forms " ..
					"outside of the present indicative are ending-stressed, while the remaining " ..
					"forms of the present indicative are stem-stressed.").. (
					variant == "" and "" or
					cls == "3" and variant == "°" and " The variant code indicates that the -н of the stem " ..
					"is missing in most non-present-tense forms." or
					cls == "3" and (variant == "(°)" or variant == "[°]") and
					" The variant code indicates that the -н of the stem " ..
					"is optionally missing in most non-present-tense forms." or
					cls == "6" and variant == "°" and
					" The variant code indicates that the present tense is not " ..
					"[[Appendix:Glossary#iotation|iotated]]. (In most verbs of this class, " ..
					"the present tense is iotated, e.g. писа́ти with present tense " ..
					"пишу́, пи́шеш, пи́ше, etc.)"
				),
				breadcrumb = cls .. variant .. pattern,
				parents = {
					{name = "class " .. cls .. " verbs", sort = pattern},
					{name = "verbs by class and accent pattern", sort = cls .. pattern},
				},
			}
		end
	end
end)


--------------------------------- Adjectives --------------------------------

labels["adjectives by stem type and stress"] = {
	description = "Ukrainian adjectives categorized by stem type and stress. " ..
		"Unlike for nouns, adjectives are consistently either stem-stressed or ending-stressed.",
	parents = {{name = "adjectives by inflection type", sort = "stem type and stress"}},
}


local adj_stem_expl = {
	["ц-stem"] = "-ц",
	["vowel-stem"] = "a vowel, or -й or -ь",
	["soft-stem"] = "a soft consonant",
	["hard-stem"] = "a hard consonant",
	["possessive"] = "-ов, -єв, -ин or -їн",
}

local adj_decl_endings = {
	["hard stem-stressed"] = {"-ий", "-а", "-е", "-і"},
	["hard ending-stressed"] = {"-и́й", "-а́", "-е́", "-і́"},
	["soft"] = {"-ій", "-я", "-є", "-і"},
	["c-stem"] = {"-ий", "-я", "-е", "-і"},
	["j-stem"] = {"-їй", "-я", "-є", "-ї"},
	["possessive"] = {"-", "-а", "-е", "-і"},
	["surname"] = {"-", "-а", "(nil)", "-и"},
}

table.insert(handlers, function(data)
	-- FIXME! Harmonize stem types here with stem types in 'adj_decl_endings' (used
	-- directly in adjectival nouns).
	local function stem_to_ending_type(stem, stress)
		if stem == "hard-stem" and stress == "stem" then
			return "hard stem-stressed"
		elseif stem == "hard-stem" and stress == "ending" then
			return "hard ending-stressed"
		elseif stress == "" then
			if stem == "soft-stem" then
				return "soft"
			elseif stem == "ц-stem" then
				return "c-stem"
			elseif stem == "vowel-stem" then
				return "j-stem"
			elseif stem == "possessive" then
				return "possessive"
			end
		end
		return nil
	end

	local breadcrumb, stem, stress = rmatch(data.label, "^(([^ ]*) ([^ *]*)-stressed) adjectives")
	if not breadcrumb then
		breadcrumb, stem = rmatch(data.label, "^(([^ ]*)) adjectives")
		stress = ""
	end
	if breadcrumb then
		local ending_type = stem_to_ending_type(stem, stress)
		if ending_type and adj_stem_expl[stem] then
			local m, f, n, p = unpack(adj_decl_endings[ending_type])
			local stresstext = stress == "stem" and
				"The adjectives in this category have stress on the stem." or
				stress == "ending" and
				"The adjectives in this category have stress on the endings." or
				"All adjectives of this type have stress on the stem."
			local endingtext = "ending in the nominative in masculine singular " .. m ..
				", feminine singular " .. f .. ", neuter singular " .. p .. " and plural " ..
				p .. "."
			local stemtext = " The stem ends in " .. adj_stem_expl[stem] .. "."
			return {
				description = "Ukrainian " .. stem .. " adjectives, " .. endingtext .. stemtext .. " " .. stresstext,
				breadcrumb = breadcrumb,
				parents = {"adjectives by stem type and stress"},
			}
		end
	end
end)


--------------------------------- Nouns/Pronouns/Numerals --------------------------------

for _, pos in ipairs({"nouns", "pronouns", "numerals"}) do
	local sgpos = pos:gsub("s$", "")
	
	local function make_label(label, description, parents, breadcrumb)
		labels[pos .. " " .. label] = {
			description = "Ukrainian " .. pos .. " " .. description,
			breadcrumb = breadcrumb or label,
			parents = parents,
		}
	end

	make_label("by stem type and gender",
		"categorized by stem type and typical gender. " ..
			"Note that \"typical gender\" means the gender that is typical for the " .. sgpos .. "'s ending (e.g. most " .. pos .. " in ''-а'' are " ..
			"feminine, and hence all such " .. pos .. " are considered to be \"typically feminine\"; but some are in fact masculine).",
		{{name = pos .. " by inflection type", sort = "stem type and gender"}}
	)

	make_label("by stem type, gender and accent pattern",
		"categorized by stem type, typical gender and " ..
			"accent pattern. Note that \"typical gender\" means the gender that is typical for the " .. sgpos .. "'s ending (e.g. most " ..
			pos .. " in ''-а'' are feminine, and hence all such " .. pos .. " are considered to be \"typically feminine\"; but some are in " ..
			"fact masculine). See [[Template:uk-ndecl]] for further information on accent patterns.",
		{{name = pos .. " by inflection type", sort = "stem type, gender and accent pattern"}}
	)

	make_label("by vowel alternation",
		"categorized according to their vowel alternation pattern (e.g. ''і'' vs. ''о'').",
		{{name = pos, sort = "vowel alternation"}}
	)

	make_label("by accent pattern",
		"categorized according to their accent pattern (see [[Template:uk-ndecl]]).",
		{{name = pos .. " by inflection type", sort = "accent pattern"}}
	)

	make_label("with reducible stem",
		"with a reducible stem, where an extra vowel is inserted " ..
			"before the last stem consonant in the nominative singular and/or genitive plural.",
		{{name = pos .. " by inflection type", sort = "reducible stem"}}
	)

	make_label("with multiple stems",
		"with multiple stems.",
		{{name = pos .. " by inflection type", sort = "multiple stems"}}
	)

	make_label("with multiple accent patterns",
		"with multiple accent patterns. See [[Template:uk-ndecl]].",
		{{name = pos .. " by inflection type", sort = "multiple accent patterns"}}
	)

	labels["adjectival " .. pos] = {
		description = "Ukrainian " .. pos .. " with adjectival endings.",
		parents = {pos},
	}

	make_label("with irregular stem",
		"with an irregular stem, which occurs in all cases except the nominative singular and maybe the accusative singular.",
		{{name = "irregular " .. pos, sort = "stem"}}
	)

	make_label("with irregular plural stem",
		"with an irregular plural stem, which occurs in all cases.",
		{{name = "irregular " .. pos, sort = "plural stem"}}
	)
end

local noun_stem_expl = {
	["hard"] = "a hard consonant",
	["velar-stem"] = "a velar (-к, -г or –x)",
	["semisoft"] = "a hushing consonant (-ш, -ж, -ч or -щ)",
	["soft"] = "a soft consonant",
	["c-stem"] = "-ц",
	["j-stem"] = "conceptual -й",
	["n-stem"] = "-м' (with -ен- in some forms)",
	["t-stem"] = "-я or -а (with -т- in most forms)",
	["possessive"] = "-ов, -єв, -ин or -їн",
	["surname"] = "-ов, -ів, -їв, -єв, -ин, -ін or -їн",
}

local noun_stem_to_declension = {
	["third-declension"] = "third",
	["t-stem"] = "fourth",
	["n-stem"] = "fourth",
}

local noun_stem_gender_endings = {
    masculine = {
		["hard"]              = {"a hard consonant", "-и"},
		["velar-stem"]        = {"a velar", "-и"},
		["semisoft"]          = {"a hushing consonant or -р", "-і"},
		["soft"]              = {"-ь or -р", "-і"},
		["j-stem"]            = {"-й", "-ї"},
		["hard-о"]            = {"-о", "-и or occasionally -а"},
		["velar-stem-о"]      = {"-о", "-и or occasionally -а"},
		["soft-о"]            = {"-ьо", "-і"},
		["semisoft-о"]        = {"-о", "-и"},
		["semisoft-е"]        = {"-е", "-а"},
	},
    feminine = {
		["hard"]              = {"-а", "-и"},
		["semisoft"]          = {"-а", "-і"},
		["soft"]              = {"-я", "-і"},
		["j-stem"]            = {"-я", "-ї"},
		["third-declension"]  = {"-ь, -р, a labial, or a hushing consonant", "-і"},
		["semisoft-е"]        = {"-е", "-і"},
	},
    neuter = {
		["hard"]              = {"-о", "-а"},
		["velar-stem"]        = {"-о", "-а"},
		["semisoft"]          = {"-е", "-а"},
		["soft"]              = {"-е", "-я"},
		["j-stem"]            = {"-є", "-я"},
		["soft-я"]            = {"-я", "-я"},
		["n-stem"]            = {"-я", "-я"},
		["t-stem"]            = {"-я or -а", "-та"},
	},
}

table.insert(handlers, function(data)
	local function escape_accent(accent)
		return rsub(accent, "'", "'")
	end

	local in_ending = "in %-([оея])"

	local function get_stem_gender_text(stem, genderspec, pos)
		local gender = genderspec
		local ending = rmatch(gender, in_ending .. "$")
		local stemindex = stem
		if ending then
			gender = rsub(gender, " " .. in_ending .. "$", "")
			stemindex = stemindex .. "-" .. ending
		end
		if not noun_stem_gender_endings[gender] then
			return nil
		end
		local endings = noun_stem_gender_endings[gender][stemindex]
		if not endings then
			return nil
		end
		local sgending, plending = endings[1], endings[2]
		local stemtext = noun_stem_expl[stem] and " The stem ends in " .. noun_stem_expl[stem] .. "." or ""
		local decltext =
			rfind(stem, "declension") and "" or
			" This is traditionally considered to belong to the " .. (
				noun_stem_to_declension[stem] or gender == "feminine" and "first" or "second"
			) .. " declension."
		local genderdesc
		if rfind(genderspec, in_ending .. "$") then
			genderdesc = gender .. " " .. pos .. "s"
		else
			genderdesc = "usually " .. gender .. " " .. pos .. "s"
		end
		return stem .. ", " .. genderdesc .. ", normally ending in " .. sgending .. " in the nominative singular " ..
			" and " .. plending .. " in the nominative plural." .. stemtext .. decltext
	end

	local stem, gender, accent, pos = rmatch(data.label, "^(.*) (.-) adjectival accent%-(.-) (.*)s$")
	if not stem then
		stem, gender, pos = rmatch(data.label, "^(.*) (.-) adjectival (.*)s$")
	end
	if stem and noun_stem_expl[stem] then
		local stemspec
		if stem == "hard" then
			stemspec = accent == "a" and "hard stem-stressed" or "hard ending-stressed"
		else
			stemspec = stem
		end
		local endings = adj_decl_endings[stemspec]
		if endings then
			local stemtext = " The stem ends in " .. noun_stem_expl[stem] .. "."
			local accentdesc = accent == "a" and
				"This " .. pos .. " is stressed according to accent pattern a (stress on the stem)." or
				accent == "b" and
				"This " .. pos .. " is stressed according to accent pattern b (stress on the ending)." or
				"All " .. pos .. "s of this class are stressed according to accent pattern a (stress on the stem)."
			local accenttext = accent and " accent-" .. accent or ""
			local m, f, n, pl = unpack(endings)
			local sg =
				gender == "masculine" and m or
				gender == "feminine" and f or
				gender == "neuter" and n or
				nil
			return {
				description = "Ukrainian " .. stem .. " " .. gender .. " " .. pos ..
				"s, with adjectival endings, ending in " .. (sg and sg .. " in the nominative singular and " or "") ..
				pl .. " in the nominative plural." .. stemtext .. " " .. accentdesc,
				breadcrumb = stem .. " " .. gender .. accenttext,
				parents = {
					{name = "adjectival " .. pos .. "s", sort = stem .. " " .. gender .. accenttext},
					pos .. "s by stem type, gender and accent pattern",
				}
			}
		end
	end

	local part1, stem, gender, accent, part2, pos = rmatch(data.label, "^((.-) (.-)%-form) accent%-(.-)( (.*)s)$")
	local ending
	if not stem then
		-- check for e.g. 'Ukrainian hard masculine accent-a nouns in -о'
		part1, stem, gender, accent, part2, pos, ending = rmatch(data.label, "^((.-) ([a-z]+ine)) accent%-(.-)( (.*)s " .. in_ending .. ")$")
		if stem then
			gender = gender .. " in -" .. ending
		end
	end
	if not stem then
		-- check for e.g. 'Ukrainian soft neuter accent-a nouns in -я'
		part1, stem, gender, accent, part2, pos, ending = rmatch(data.label, "^((.-) (neuter)) accent%-(.-)( (.*)s " .. in_ending .. ")$")
		if stem then
			gender = gender .. " in -" .. ending
		end
	end
	if stem then
		local stem_gender_text = get_stem_gender_text(stem, gender, pos)
		if stem_gender_text then
			local accent_text = " This " .. pos .. " is stressed according to accent pattern " ..
				escape_accent(accent) .. " (see [[Template:uk-ndecl]])."
			return {
				description = "Ukrainian " .. stem_gender_text .. accent_text,
				breadcrumb = "Accent-" .. escape_accent(accent),
				parents = {
					{name = part1 .. part2, sort = accent},
					pos .. "s by stem type, gender and accent pattern",
				}
			}
		end
	end

	local stem, gender, pos = rmatch(data.label, "^(.-) (.-)%-form (.*)s$")
	if not stem then
		-- check for e.g. 'Ukrainian hard masculine nouns in -о'
		stem, gender, pos, ending = rmatch(data.label, "^(.-) ([a-z]+ine) (.*)s " .. in_ending .. "$")
		if stem then
			gender = gender .. " in -" .. ending
		end
	end
	if not stem then
		-- check for e.g. 'Ukrainian soft neuter nouns in -я'
		stem, gender, pos, ending = rmatch(data.label, "^(.-) (neuter) (.*)s " .. in_ending .. "$")
		if gender then
			gender = gender .. " in -" .. ending
		end
	end
	if stem then
		local stem_gender_text = get_stem_gender_text(stem, gender, pos)
		if stem_gender_text then
			return {
				description = "Ukrainian " .. stem_gender_text,
				breadcrumb = ending and stem .. " " .. gender or stem .. " " .. gender .. "-form",
				parents = {pos .. "s by stem type and gender"},
			}
		end
	end

	local pos, accent = rmatch(data.label, "^(.*)s with accent pattern (.*)$")
	if accent then
		return {
			description = "Ukrainian " .. pos .. "s with accent pattern " .. escape_accent(accent) ..
				" (see [[Template:uk-ndecl]]).",
			breadcrumb = {name = escape_accent(accent), nocap = true},
			parents = {{name = pos .. "s by accent pattern", sort = accent}},
		}
	end

	local pos, fromto, altfrom, altto = rmatch(data.label, "^(.*)s with ((.*)%-(.*)) alternation$")
	if altfrom then
		return {
			description = "Ukrainian " .. pos .. "s with vowel alternation between " .. altfrom ..
				" in the lemma and " .. altto .. " in the last syllable of some or all remaining forms.",
			breadcrumb = {name = fromto, nocap = true},
			parents = {{name = pos .. "s by vowel alternation", sort = fromto}},
		}
	end
end)


return {LABELS = labels, HANDLERS = handlers}