Module:User:Benwing2/category tree/poscatboiler/data/lang-specific/ar


This is a private module sandbox of Benwing2, for their own experimentation. Items in this module may be added and removed at Benwing2's discretion; do not rely on this module's stability.


local labels = {}
local handlers = {}

local m_table = require("Module:table")


-----------------------------------------------------------------------------
--                                                                         --
--                           NOUNS AND ADJECTIVES                          --
--                                                                         --
-----------------------------------------------------------------------------


--------------------------------- Noun labels --------------------------------

labels["nouns by derivation type"] = {
	description = "{{{langname}}} nouns categorized by type of derivation.",
	parents = {{name = "nouns", sort = "derivation type"}},
	breadcrumb = "by derivation type",
}

labels["instance nouns"] = {
	description = "{{{langname}}} [[Appendix:Arabic nominals#Instance nouns|instance nouns]], i.e. nouns having the meaning \"an instance of doing X\" for some verb.",
	parents = {{name = "nouns by derivation type", sort = "instance nouns"}},
	breadcrumb = "instance nouns",
}

labels["nouns of place"] = {
	description = "{{{langname}}} [[Appendix:Arabic nominals#Nouns of place|nouns of place]], i.e. nouns having the approximate meaning \"the place for doing X\" for some verb.",
	parents = {{name = "nouns by derivation type", sort = "nouns of place"}},
	breadcrumb = "nouns of place",
}

labels["occupational nouns"] = {
	description = "{{{langname}}} [[Appendix:Arabic nominals#Occupational nouns|occupational nouns]], i.e. nouns referring to people employed in doing something.",
	parents = {{name = "nouns by derivation type", sort = "occupational nouns"}},
	breadcrumb = "occupational nouns",
}

labels["relative nouns (nisba)"] = {
	description = "{{{langname}}} [[Appendix:Arabic nominals#Relative nouns (nisba)|relative (nisba) nouns]], i.e. abstract nouns formed with the suffix {{m|ar|ـِيَّة}} and derived from an adjective or other noun (or occasionally other parts of speech).",
	parents = {{name = "nouns by derivation type", sort = "relative nouns (nisba)"}},
	breadcrumb = "relative nouns (nisba)",
}

labels["tool nouns"] = {
	description = "{{{langname}}} [[Appendix:Arabic nominals#Tool nouns|tool nouns]], i.e. nouns having the approximate meaning \"tool for doing X\" for some verb.",
	parents = {{name = "nouns by derivation type", sort = "tool nouns"}},
	breadcrumb = "tool nouns",
}

labels["feminine terms lacking feminine ending"] = {
	description = "{{{langname}}} feminine terms that do not end in the feminine endings {{m|ar|ـَة}}‎, {{m|ar||ـَاء}}‎, {{m|ar||ـَا}}‎{{,}} or {{m|ar|ـَى}}.",
	additional = "It is automatically added by [[Module:ar-headword]] to lemma entries.",
	parents = {"nouns", "terms by lexical property", "feminine nouns"},
}

labels["masculine terms with feminine ending"] = {
	description = "{{{langname}}} masculine terms ending in one of the feminine endings {{m|ar|ـَة}}‎, {{m|ar||ـَاء}}‎, {{m|ar||ـَا}}‎{{,}} or {{m|ar|ـَى}}.",
	additional = "It is automatically added by [[Module:ar-headword]] to lemma entries.",
	parents = {"nouns", "terms by lexical property", "masculine nouns"},
}


--------------------------------- Adjective labels --------------------------------

labels["adjectives by derivation type"] = {
	description = "{{{langname}}} adjectives categorized by type of derivation.",
	parents = {{name = "adjectives", sort = "derivation type"}},
	breadcrumb = "by derivation type",
}

labels["characteristic adjectives"] = {
	description = "{{{langname}}} [[Appendix:Arabic nominals#Characteristic nouns and adjectives|characteristic adjectives]], i.e. adjectives meaning \"habitually doing X\" for some verb.",
	parents = {{name = "adjectives", sort = "characteristic"}},
	breadcrumb = "characteristic",
}

labels["color/defect adjectives"] = {
	description = "{{{langname}}} [[Appendix:Arabic nominals#Color or defect adjectives|color/defect adjectives]], i.e. adjectives generally referring to colors and physical defects.",
	parents = {{name = "adjectives", sort = "color/defect"}},
	breadcrumb = "color/defect",
}

labels["relative adjectives (nisba)"] = {
	description = "{{{langname}}} [[Appendix:Arabic nominals#Relative adjectives (nisba)|relative (nisba) adjectives]], i.e. adjectives formed with the suffix {{m|ar|ـِيّ}} and meaning \"related to X\" for some noun (or occasionally other parts of speech).",
	parents = {{name = "adjectives", sort = "relative (nisba)"}},
	breadcrumb = "relative (nisba)",
}


--------------------------------- Noun/adjective handlers --------------------------------

-- Only fire if the part of speech is one of these.
local allowed_pos = m_table.listToSet {"noun", "pronoun", "numeral", "adjective"}
-- Only fire if one of these words occurs.
local required_words = {"triptote", "diptote", "singular", "plural", "dual", "paucal", "singulative", "collective"}

table.insert(handlers, function(data)
	local pos, typ = data.label:match("^([a-z]+)s with (.+)$")
	if not pos or not allowed_pos[pos] then
		return nil
	end
	local spaced_typ = " " .. typ .. " "
	local ok = false
	for _, required_word in ipairs(required_words) do
		if spaced_typ:find(" " .. required_word .. " ") then
			ok = true
			break
		end
	end
	if not ok then
		return nil
	end

	local parents = {{name = pos .. "s by inflection type", sort = typ}}
	if typ ~= "broken plural" and typ:find("broken plural") then
		table.insert(parents, {name = pos .. "s with broken plural", sort = typ})
	end
	if typ:find("irregular") then
		table.insert(parents, {name = "irregular " .. pos .. "s", sort = typ})
	end

	return {
		description = "{{{langname}}} " .. data.label .. ".",
		breadcrumb = typ,
		parents = parents,
	}
end)



-----------------------------------------------------------------------------
--                                                                         --
--                                   VERBS                                 --
--                                                                         --
-----------------------------------------------------------------------------


--------------------------------- Verb labels --------------------------------

labels["verbs with quadriliteral roots"] = {
	description = "{{{langname}}} verbs built on roots consisting of four radicals (instead of the more common triliteral roots), categorized by form.",
	parents = {{name = "verbs by inflection type", sort = "quadriliteral roots"}},
	breadcrumb = "with quadriliteral roots",
}

labels["verbs by conjugation"] = {
	description = "{{{langname}}} verbs categorized by type of weakness displayed in their conjugation (as opposed to weakness determined by form, i.e. by the presence of certain \"weak\" radicals in certain positions).",
	parents = {{name = "verbs by inflection type", sort = "conjugation"}},
	breadcrumb = "by conjugation",
}

labels["verbs by type of passive"] = {
	description = "{{{langname}}} verbs categorized by type of passive available.",
	parents = {{name = "verbs", sort = "type of passive"}},
	breadcrumb = "by type of passive",
}

labels["verbs with full passive"] = {
	description = "{{{langname}}} verbs with passive forms in all persons and numbers.",
	parents = {{name = "verbs by type of passive", sort = "full passive"}},
	breadcrumb = "full passive",
}

labels["verbs with impersonal passive"] = {
	description = "{{{langname}}} verbs with impersonal passive forms only, i.e. only in the third-person masculine singular.",
	parents = {{name = "verbs by type of passive", sort = "impersonal passive"}},
	breadcrumb = "impersonal passive",
}

labels["verbs with no passive"] = {
	description = "{{{langname}}} verbs without passive forms.",
	parents = {{name = "verbs by type of passive", sort = "no passive"}},
	breadcrumb = "no passive",
}


--------------------------------- Verb handlers --------------------------------

local weakness_desc = {
	["geminate"] = "the second and third radicals are identical, which sometimes causes an intervening short vowel to drop",
	["assimilated"] = "the first radical is {{lang|ar|و}} or {{lang|ar|ي}}, which disappears in some forms",
	["hollow"] = "the second radical is {{lang|ar|و}} or {{lang|ar|ي}}, which is replaced with a long or short vowel in some forms",
	["third-weak"] = "the third of four radicals is {{lang|ar|و}} or {{lang|ar|ي}} (normally not leading to significant irregularities)",
	["final-weak"] = "the last radical is {{lang|ar|و}} or {{lang|ar|ي}}, normally leading to irregular endings",
	["assimilated+final-weak"] = "the first radical is {{lang|ar|و}} or {{lang|ar|ي}} and the last radical is {{lang|ar|و}} or {{lang|ar|ي}}, normally leading to irregular endings",
	["sound"] = "none of the radicals is {{lang|ar|و}} or {{lang|ar|ي}} or {{lang|ar|ء}}, nor are the second and third radicals identical",
	["hamzated"] = "one of the radicals is {{lang|ar|ء}}, leading to spelling and occasionally conjugation irregularities",
}

local weakness_english = {
	["assimilated+final-weak"] = "both assimilated and final-weak",
}

local weakness_desc_by_conjugation = {
	["geminate"] = "This includes verbs where the second and third radicals are identical and the vowel between them is deleted in some parts of the conjugation. This is not the same as [[:Category:Arabic geminate verbs|geminate verbs]] by form, which is determined purely by the second and third radicals being identical, regardless of the conjugation. (For example, form-II verbs that are geminate by form are sound by conjugation.)",
	["assimilated"] = "Generally this only includes form-I verbs where the first radical is {{lang|ar|و}}, leading to a shortened non-past stem. This is not the same as [[:Category:Arabic assimilated verbs|assimilated verbs]] by form, which is determined purely by the first radical being {{lang|ar|و}} or {{lang|ar|ي}}, regardless of the conjugation. (All verbs that are assimilated by form but not form-I are sound by conjugation, as are form-I verbs whose first radical is {{lang|ar|ي}}, and a few form-I verbs whose first radical is {{lang|ar|و}}.)",
	["hollow"] = "This includes verbs where the second radical is {{lang|ar|و}} or {{lang|ar|ي}} and appears as a vowel in most parts of the conjugation. This is not the same as [[:Category:Arabic hollow verbs|hollow verbs]] by form, which is determined only by the second radical being {{lang|ar|و}} or {{lang|ar|ي}}, regardless of the conjugation. (For example, form-II verbs that are hollow by form are sound by conjugation.)",
	["final-weak"] = "This includes verbs where the the last radical is {{lang|ar|و}} or {{lang|ar|ي}}, leading to irregular endings. This is not the same as [[:Category:Arabic final-weak verbs|final-weak verbs]] by form, which is determined only by the last radical being {{lang|ar|و}} or {{lang|ar|ي}}, regardless of the conjugation, although the two categories largely coincide.",
	["assimilated+final-weak"] = "Generally this only includes form-I verbs where the first radical is {{lang|ar|و}} and the last radical is {{lang|ar|و}} or {{lang|ar|ي}}, leading to irregular endings and a shortened non-past stem. This is not the same as verbs that are [[:Category:Arabic assimilated verbs|assimilated]] and [[:Category:Arabic final-weak verbs|final-weak]] by form, which is determined purely by both the first and last radical being {{lang|ar|و}} or {{lang|ar|ي}}, regardless of the conjugation. (All verbs that are assimilated+final-weak by form but not form-I are just final-weak by conjugation, as are form-I verbs whose first radical is {{lang|ar|ي}}.)",
	["sound"] = "This includes regular verbs without any irregularities caused by weak ({{lang|ar|و}} or {{lang|ar|ي}}) radicals. This is not the same as [[:Category:Arabic sound verbs|sound verbs]] by form, which is determined purely by lacking any weak radicals, regardless of the conjugation. Some verbs with weak radicals are nonetheless sound by conjugation; an example is form-II verbs that are [[:Category:Arabic hollow verbs|hollow verbs]] by form, i.e. with the second radical being {{lang|ar|و}} or {{lang|ar|ي}}.",
}

local trilit_form_to_number = {
	["I"] = 1,
	["II"] = 2,
	["III"] = 3,
	["IV"] = 4,
	["V"] = 5,
	["VI"] = 6,
	["VII"] = 7,
	["VIII"] = 8,
	["IX"] = 9,
	["X"] = 10,
	["XI"] = 11,
	["XII"] = 12,
	["XIII"] = 13,
	["XIV"] = 14,
	["XV"] = 15,
}

local quadlit_form_to_number = {
	["Iq"] = 1,
	["IIq"] = 2,
	["IIIq"] = 3,
	["IVq"] = 4,
}

local function form_to_sort_key(form, with_space)
	if trilit_form_to_number[form] then
		if with_space then
			return (" %02d"):format(trilit_form_to_number[form])
		else
			return "" .. trilit_form_to_number[form]
		end
	elseif quadlit_form_to_number[form] then
		if with_space then
			return (" %02dq"):format(quadlit_form_to_number[form])
		else
			return "" .. quadlit_form_to_number[form]
		end
	else
		return nil
	end
end

local function form_link(form)
	return "[[Appendix:Arabic verbs#Form " .. form .. "|form-" .. form .. "]]"
end

local function weakness_link(weakness)
	if weakness == "hamzated" then
		return "[[Appendix:Arabic verbs#Hamzated verbs|hamzated]]"
	elseif weakness == "geminate" then
		return "[[Appendix:Arabic verbs#Geminate verbs|geminate]]"
	elseif weakness == "sound" then
		return "sound"
	else
		return "[[Appendix:Arabic verbs#Weak verbs|" .. (weakness_english[weakness] or weakness) .. "]]"
	end
end

-- Entries for e.g. [[:Category:Arabic final-weak verbs]]. Use entries instead of a handler
-- so that children show up in [[:Category:Arabic verbs by inflection type]].
for weakness, desc in pairs(weakness_desc) do
	labels[weakness .. " verbs"] = {
		description = "{{{langname}}} verbs with " .. weakness_link(weakness) .. " roots, where " .. desc .. ".",
		parents = {
			{name = "verbs by inflection type", sort = weakness},
		},
		breadcrumb = weakness,
	}
end

-- Entries for e.g. [[:Category:Arabic final-weak verbs by conjugation]]. Use entries instead of a handler
-- so that children show up in [[:Category:Arabic verbs by conjugation].
for weakness, desc in pairs(weakness_desc_by_conjugation) do
	labels[weakness .. " verbs by conjugation"] = {
		description = "{{{langname}}} verbs conjugated as " .. weakness_link(weakness) .. ". " .. weakness_desc_by_conjugation[weakness],
		parents = {
			{name = "verbs by conjugation", sort = weakness},
		},
		breadcrumb = weakness,
	}
end

-- Handler for e.g. [[:Category:Arabic form-VIII verbs]].
table.insert(handlers, function(data)
	local form = data.label:match("^form%-([IVX]+q?) verbs$")
	if not form then
		return nil
	end
	local form_sort_key = form_to_sort_key(form, "with space")
	if not form_sort_key then
		return nil
	end
	local parents = {
		{name = "verbs by inflection type", sort = form_sort_key},
	}
	if form:find("q$") then
		table.insert(parents, {name = "verbs with quadriliteral roots", sort = form_sort_key})
	end
	return {
		description = "{{{langname}}} " .. form_link(form) .. " verbs.",
		parents = parents,
		breadcrumb = "form " .. form,
	}
end)

-- Handler for e.g. [[:Category:Arabic final-weak form-VIII verbs]].
table.insert(handlers, function(data)
	local weakness, form = data.label:match("^([a-z+-]+) form%-([IVX]+q?) verbs$")
	if not weakness or not weakness_desc[weakness] then
		return nil
	end
	local form_sort_key = form_to_sort_key(form)
	if not form_sort_key then
		return nil
	end
	return {
		description = "{{{langname}}} " .. form_link(form) .. " verbs with " .. weakness_link(weakness) ..
		" roots, where " .. weakness_desc[weakness] .. ".",
		parents = {
			{name = "form-" .. form .. " verbs", sort = weakness},
			{name = weakness .. " verbs", sort = form_sort_key},
		},
		breadcrumb = weakness,
	}
end)

local radical_ordinals = m_table.listToSet {"first", "second", "third", "fourth"}
local weak_radicals = m_table.listToSet {"و", "ي", "ء"}

-- Handler for e.g. [[:Category:Arabic form-IV verbs with و as second radical]].
table.insert(handlers, function(data)
	local form, radical, ordinal = mw.ustring.match(data.label, "^form%-([IVX]+q?) verbs with (.) as ([a-z]+) radical$")
	if not form then
		return nil
	end
	local form_sort_key = form_to_sort_key(form)
	if not form_sort_key then
		return nil
	end
	if not weak_radicals[radical] or not radical_ordinals[ordinal] then
		return nil
	end
	local weakness = radical == "ء" and "hamzated" or
		ordinal == "first" and "assimilated" or
		ordinal == "second" and "hollow" or
		ordinal == "third" and form:match("q$") and "third-weak" or
		"final-weak"
	return {
		description = "{{{langname}}} " .. form_link(form) .. " verbs with " .. weakness_link(weakness) ..
			" roots having {{lang|ar|" .. radical .. "}} as their " .. ordinal .. " radical.",
		parents = {
			{name = weakness .. " form-" .. form .. " verbs", sort = " "},
			{name = weakness .. " verbs", sort = form_sort_key .. radical},
		},
		breadcrumb = "form " .. form,
	}
end)


return {LABELS = labels, HANDLERS = handlers}