Module:typing-aids/data/sd

This module needs documentation.
Please document this module by describing its purpose and usage on the documentation page.

local data = {}

local U = mw.ustring.char

local anusvAra = U(0x112DF)
local virAma = U(0x112EA)
local nuktA = U(0x112E9)
local consonants = "𑊺𑊻𑊼𑊽𑊾𑊿𑋀𑋁𑋂𑋃𑋄𑋅𑋆𑋇𑋈𑋉𑋊𑋋𑋌𑋍𑋎𑋏𑋐𑋑𑋒𑋓𑋔𑋕𑋖𑋗𑋘𑋙𑋚𑋛𑋜𑋝𑋞"
local consonant = "[" .. consonants .. "]" .. nuktA .. "?"

local acute = U(0x301)		-- combining acute

data["sd"] = {
	-- Vowels and modifiers. Do the diphthongs and diaereses first.
	{"ai", "𑊷"},
	{"au", "𑊹"},
	{"ä", "𑊰"},
	{"ö", "𑊸"},
	{"ï", "𑊲"},
	{"ü", "𑊴"},
	{"a", "𑊰"},
	{"ā", "𑊱"},
	{"i", "𑊲"},
	{"ī", "𑊳"},
	{"u", "𑊴"},
	{"ū", "𑊵"},
	{"e", "𑊶"},
	{"o", "𑊸"},
	{"(𑊰)[%-/]([𑊲𑊴])", "%1%2"},		-- a-i, a-u for 𑊰𑊲, 𑊰𑊴; must follow rules for "ai", "au"

	-- Two-letter consonants must go before h.
	{"kh", "𑊻"},
	{"gh", "𑊾"},
	{"ch", "𑋁"},
	{"jh", "𑋄"},
	{"ṭh", "𑋇"},
	{"ḍh", "𑋋"},
	{"th", "𑋎"},
	{"dh", "𑋐"},
	{"ph", "𑋓"},
	{"bh", "𑋖"},
	{"h", "𑋞"},

	-- Implosives.
	{"g̈", "𑊽"},
	{"j̈", "𑋃"},
	{"d̤", "𑋉"},
	{"b̤", "𑋕"},

	-- Consonants with nukta.
	{"q", "𑊺𑋩"},
	{"x", "𑊻𑋩"},
	{"ġ", "𑊼𑋩"},
	{"z", "𑋂𑋩"},
	{"f", "𑋓𑋩"},

	-- Other stops.
	{"k", "𑊺"},
	{"g", "𑊼"},
	{"c", "𑋀"},
	{"j", "𑋂"},
	{"ṭ", "𑋆"},
	{"ḍ", "𑋈"},
	{"ṛ", "𑋊"},
	{"t", "𑋍"},
	{"d", "𑋏"},
	{"p", "𑋒"},
	{"b", "𑋔"},

	-- Nasals.
	{"ṅ", "𑊿"},
	{"ñ", "𑋅"},
	{"ṇ", "𑋌"},
	{"n", "𑋑"},
	{"n", "𑋑"},
	{"m", "𑋗"},

	-- Remaining consonants.
	{"y", "𑋘"},
	{"r", "𑋙"},
	{"l", "𑋚"},
	{"v", "𑋛"},
	{"ś", "𑋜"},
	{"s", "𑋝"},

	{"ṃ", anusvAra},

	-- This rule must be applied twice because a consonant may only be in one capture per operation,
	-- so "CCC" will only recognize the first two consonants. Must follow all consonant conversions.
	{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
	{"(" .. consonant .. ")(" .. consonant .. ")", "%1" .. virAma .. "%2"},
	{"(" .. consonant .. ")$", "%1" .. virAma},
	{acute, ""},
}

local vowels = {
	["𑊲"] = U(0x112E1),
	["𑊴"] = U(0x112E3),
	["𑊶"] = U(0x112E5),
	["𑊸"] = U(0x112E7),
	["𑊱"] = U(0x112E0),
	["𑊳"] = U(0x112E2),
	["𑊵"] = U(0x112E4),
	["𑊷"] = U(0x112E6),
	["𑊹"] = U(0x112E8),
}

-- Convert independent vowels to diacritics after consonants. Must go after all consonant conversions.
for independentForm, diacriticalForm in pairs(vowels) do
	table.insert(data["sd"], {"(" .. consonant .. ")" .. independentForm, "%1" .. diacriticalForm})
end

-- This must go last, after independent vowels are converted to diacritics, or "aï", "aü" won't work.
table.insert(data["sd"], {"(" .. consonant .. ")𑊰", "%1"})

-- [[w:Harvard-Kyoto]] to [[w:International Alphabet of Sanskrit Transliteration]]
data["sd-tr"] = {
	[1] = {
		["A"] = "ā",
		["I"] = "ī",
		["U"] = "ū",
		["J"] = "ñ",
		["T"] = "ṭ",
		["D"] = "ḍ",
		["N"] = "ṇ",
		["G"] = "ṅ",
		["S"] = "ś",
		["M"] = "ṃ",

		["/"] = acute,
	},
	[2] = {
		["_gh_"] = "ġ",
		["_g_"] = "g̈",
		["_j_"] = "j̈",
		["_d_"] = "d̤",
		["_b_"] = "b̤",
		["R"] = "ṛ",
	},
}

return data