Module:qinlian-pron

This module page is experimental.
The details of its operation have not yet been fully decided upon. Do not deploy widely until the module page is finished.
This module needs documentation.
Please document this module by describing its purpose and usage on the documentation page.

local export = {}

local ugsub = mw.ustring.gsub
local split = mw.text.split

local entering_tones = {
	["1"] = "7", ["3"] = "8", ["6"] = "9"
}

local ipa_preprocess = {
	[1] = {"a", "ă"}, [2] = {"yu", "y"}, [3] = {"ăă", "a"}, [4] = {"uk", "ŭk"}, [5] = {"ik", "ĭk"},
	[6] = {"ou", "ŏu"}, [7] = {"eoi", "eoy"}, [8] = {"ung", "ŭng"}, [9] = {"ing", "ĭng"}, [10] = {"ei", "ĕi"},
	[11] = {"oi", "oy"}, [12] = {"ui", "uy"}
}

local ipa_initial = {
	["b"] = "p", ["p"] = "pʰ", ["m"] = "m", ["f"] = "f", 
	["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l", ["sl"] = "ɬ", ["nj"] = "ɲ̟",
	["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["gw"] = "kʷ", ["kw"] = "kʷʰ",
	["zh"] = "t͡ʃ", ["ch"] = "t͡ʃʰ", ["sh"] = "ʃ",
	["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "s", 
	["h"] = "h", ["w"] = "w", ["j"] = "j", 
	[""] = ""
}

local ipa_nucleus = {
	["a"] = "ä", ["ă"] = "ɐ", 
	["e"] = "ɛ", ["ĕ"] = "e", 
	["i"] = "i", ["ĭ"] = "ɪ", 
	["o"] = "ɔ", ["ŏ"] = "o", 
	["oe"] = "œ", ["eo"] = "ɵ", 
	["u"] = "u", ["ŭ"] = "ʊ", 
	["yu"] = "yː"
}

local ipa_coda = {
	["i"] = "i̯", ["u"] = "u̯", ["y"] = "y̯", 
	["m"] = "m", ["n"] = "n", ["ng"] = "ŋ", 
	["p"] = "p̚", ["t"] = "t̚̚", 
	[""] = "",
}

local ipa_tone = {
	["1"] = "⁴⁵", 
	["2"] = "²⁴", 
	["3"] = "³³", 
	["4"] = "<span style=\"cursor:help\" title=\"or 11\">²²</span>"
}

local ipa_tone_sandhi = {
	["*"] = "⁻", ["-"] = "⁻", [""] = ""
}

local ipa_syllabic = {
	["m"] = "m̩", ["ng"] = "ŋ̍"
}

local acute_accents = {
	["a"] = "á", ["e"] = "é", ["i"] = "í", ["o"] = "ó", ["u"] = "ú"
}

local grave_accents = {
	["a"] = "à", ["e"] = "è", ["i"] = "ì", ["o"] = "ò", ["u"] = "ù"
}

local macrons = {
	["a"] = "ā", ["e"] = "ē", ["i"] = "ī", ["o"] = "ō", ["u"] = "ū"
}

local yale_final = {
	["aa"] = "a", ["eu"] = "沒有耶魯拼音", ["em"] = "沒有耶魯拼音", ["ep"] = "沒有耶魯拼音", ["et"] = "沒有耶魯拼音",
	["oe"] = "eu", ["oeng"] = "eung", ["oek"] = "euk", ["eoi"] = "eui", ["eon"] = "eun", ["eot"] = "eut",
	[""] = ""
}

local canton_pinyin_regex = {
	["yu"] = "y", ["eo"] = "oe", ["eoi"] = "oey", ["z(h?)"] = "dz%1", ["c(h?)"] = "ts%1",
	["([ptk])([136])"]=function(a,b) return a .. entering_tones[b] end,
	["([ptk][1-6][%-%*])([136])$"]=function(a,b) return a .. entering_tones[b] end
}

local function yale_tone(final,b)
	if b == "4" or b == "5" or b == "6" then
		final = final:gsub("([ptkmn]?g?)$", "h%1", 1)
	end
	if b == "1" then final = final:gsub("[aeiou]", macrons, 1) end
	if b == "4" then final = final:gsub("[aeiou]", grave_accents, 1) end
	if b == "2" or b == "5" then final = final:gsub("[aeiou]", acute_accents, 1) end
	return final
end

function export.jyutping_to_ipa(text)
	if type(text) == "table" then text = text.args[1] end

	if text:find("[7-9]") then error("Invalid tone in Jyutping.") end
	if text:find("[A-Z]") then error("Please do not capitalize the Jyutping.") end
	if text:find("%-[A-Za-z]") then error("Please do not hyphenate the Jyutping.") end
	if text:find("[0-9][a-z]") then error("Error in the Jyutping transcription.") end

	text = text:lower()
	text = text:gsub("jy([^u])", "j%1")
	text = text:gsub("%.%.%.", " ")
	text = text:gsub(",", "隔"):gsub("隔 ", ", ")
	local reading = split(text, "隔")
	
	local function retrieve_ipa1(a,b,c,d,e)
		return ipa_initial[a] .. ipa_syllabic[b] .. ipa_tone[c] .. ipa_tone_sandhi[d] .. ipa_tone[e]
	end
	
	local function retrieve_ipa2(a,b,c,d,e,f)
		return (ipa_initial[a] or error(("Unrecognised initial: \"%s\""):format(a))) ..
			(ipa_nucleus[b] or error(("Unrecognised nucleus: \"%s\""):format(b))) ..
			(ipa_coda[c] or error(("Unrecognised coda: \"%s\""):format(c))) ..
			(ipa_tone[d] or error(("Unrecognised tone: \"%s\""):format(d))) ..
			ipa_tone_sandhi[e] ..
			(ipa_tone[f] or error(("Unrecognised tone: \"%s\""):format(f)))
	end
	
	local function get_entering_tone(a,b) return a .. entering_tones[b] end
	
	for i = 1,#reading do
		reading[i] = reading[i]:gsub("([1-6])[a-z]", "%1 "):gsub("[^a-z1-6%-%* ]", "")
		local syllable = split(reading[i]:lower(), " ")
		for i = 1,#syllable do
			syllable[i] = syllable[i]:gsub("([zcs])yu", "%1hyu")
			syllable[i] = syllable[i]:gsub("([zc])oe", "%1hoe")
			syllable[i] = syllable[i]:gsub("([zc])eo", "%1heo")
			if (not syllable[i]:find("^[bcdfghjklmnpqrstvwxyz]?[bcdfghjklmnpqrstvwxyz]?[aeiouy]+[mnptk]?g?[1-9][%-%*]?[1-9]?$") and not syllable[i]:find("^h?[mn]g?[1-9][%-%*]?[1-9]?$")) and syllable[i]:find("[a-z1-9]") then
				error("Incorrect Jyutping format. Please check!")
			end
			syllable[i] = syllable[i]:gsub("^(h?)([mn]g?)([1-6])([%-%*]?)([1-6]?)$", retrieve_ipa1)
			syllable[i] = syllable[i]:gsub("([ptk])([136])", get_entering_tone)
			syllable[i] = syllable[i]:gsub("([ptk][1-9][%-%*])([136])$", get_entering_tone)
			for regex_idx,regex_pair in ipairs(ipa_preprocess) do
				syllable[i] = ugsub(syllable[i],regex_pair[1],regex_pair[2])
			end
			syllable[i] = ugsub(syllable[i], "spăn", "span")
			syllable[i] = ugsub(syllable[i],
				"([bcdfghjklmnpqrstvwxyz]?[bcdfghjklmnpqrstvwxyz]?)([aăeĕiĭoŏuŭy][eo]?)([iuymngptk]?g?)([1-9])([%-%*]?)([1-9]?)",
				retrieve_ipa2)
		end
		reading[i] = table.concat(syllable, " ")
	end
	return table.concat(reading, "/, /")
end

function export.jyutping_to_yale(text)
	if type(text) == "table" then text = text.args[1] end

	text = text:gsub("jy([^u])", "j%1")
	text = text:gsub(",", "隔"):gsub("隔 ", ", ")
	local reading = split(text, "隔")
	
	local function yale_tone2(a, b) return yale_tone((yale_final[a] or a), b) end
	
	for i = 1,#reading do
		reading[i] = reading[i]:gsub("[1-6]%-", "")
		reading[i] = reading[i]:gsub("([1-9])[a-z]", "%1 ")
		local syllables = split(mw.ustring.lower(reading[i]), " ",true)
		for j = 1,#syllables do
			local text = syllables[j]
			
			if text:find("jy[^u]") then error("wrong usage of 'jy' in Jyutping") end
			text = text:gsub("[789]",{["7"] = "1", ["8"] = "3", ["9"] = "6"})
			text = text:gsub("^m[1-6]$",{["m1"] = "{{懸停|或m̀|m̄}}", ["m2"] = "ḿ", ["m3"] = "m", ["m4"] = "m̀h", ["m5"] = "ḿh", ["m6"] = "mh"})
			text = text:gsub("^ng[1-6]$",{["ng1"] = "{{懸停|或ǹg|n̄g}}", ["ng2"] = "ńg", ["ng3"] = "ng", ["ng4"] = "ǹgh", ["ng5"] = "ńgh", ["ng6"] = "ngh"})
			text = text:gsub("jy?", "y")
			text = text:gsub("[cz]",{["z"] = "j", ["c"] = "ch"})
			text = text:gsub("([aeiou][aeiou]?[iumngptk]?[g]?)([1-6])", yale_tone2)
			text = text:gsub("(yu[tn]?)([1-6])", yale_tone)
			text = ugsub(text, "[āēīōū]",{["ā"] = "{{懸停|或à|ā}}", ["ē"] = "{{懸停|或è|ē}}", ["ī"] = "{{懸停|或ì|ī}}", ["ō"] = "{{懸停|或ò|ō}}", ["ū"] = "{{懸停|或ù|ū}}"})
			text = text:gsub("{{懸停|或([^|]+)|([^}]+)}}", '<span style="cursor:help" title="or %1">%2</span>')
			
			syllables[j] = text
		end
		reading[i] = table.concat(syllables, " ")
		if reading[i]:find("沒有耶魯拼音") then
			reading[i] = "colloquial sounds not defined"
		end
	end
	return table.concat(reading, ", ")
end

function export.jyutping_to_cantonese_pinyin(text)
	if type(text) == "table" then text = text.args[1] end
	
	text = text:gsub("jy([^u])", "j%1")
	text = text:gsub(",", "隔"):gsub("隔 ", ", ")
	local reading = split(text, "隔")
	for i=1,#reading do
		for regex, replace in pairs(canton_pinyin_regex) do
			reading[i] = ugsub(reading[i], regex, replace)
		end
	end
	return table.concat(reading, ", ")
end

function export.jyutping_to_guangdong(text)
	if type(text) == 'table' then text = text.args[1] end
	
	local palatal = { ['z']='j', ['c']='q', ['s']='x' }
	local function palatalize(a,b) return palatal[a] .. b end
	
	local final = { ['p']='b', ['k']='g', ['t']='d' }
	local function get_final(a,b) return final[a] .. b end

	text = text:gsub(",", "隔"):gsub("隔 ", ", ")
	local reading = split(text, "隔")
	for i, item in ipairs(reading) do
		item = item:gsub('yu', 'ü')
		item = item:gsub('eoi', 'êü')
		item = item:gsub('j', 'y')

		item = item:gsub('[oe][oe]', 'ê')
		item = item:gsub('e', 'é')

		item = ugsub(item, '([zcs])([iü])',  palatalize)

		item = ugsub(item, '([jqxyê])ü', '%1u')

		item = item:gsub('a+', { ['aa']='a', ['a']='e' } )

		item = item:gsub('([kg])w', '%1u')
		item = item:gsub('([ae])u', '%1o')

		item = item:gsub('([pkt])(%d)', get_final)

		item = item:gsub('%d%-(%d)', '%1')

		if item:find('é[umb]') then
			item = 'colloquial sounds not defined'
		end
		reading[i] = item
	end
	return table.concat(reading, ", ")
end

function export.jyutping_format(text)
	if type(text) == "table" then text = text.args[1] end
	
	if text:find("%[%[") then
		return "just a lengthy text to ensure it works"
	end
	
	text = split(text:gsub(",", "隔"):gsub("隔 ", ", "), "隔")
	for i, to_be_processed in ipairs(text) do
		text[i] = to_be_processed:gsub("[1-6]%-([1-6])", "%1")
	end
	
	return text
end

function export.hoisanva_to_ipa(text)
	local hsv_initial = {
		["b"] = "p", ["p"] = "pʰ", ["m"] = "ᵐb", ["f"] = "f", ["v"] = "v",
		["d"] = "t", ["t"] = "tʰ", ["n"] = "ⁿd", ["l"] = "l",
		["g"] = "k", ["k"] = "kʰ", ["ng"] = "ᵑɡ", 
		["z"] = "t͡s", ["c"] = "t͡sʰ",
		["y"] = "ʒ", ["s"] = "s", ["h"] = "h", [""] = ""
	}
	
	local hsv_final = {
		["a"] = "a", ["ai"] = "ai", ["au"] = "au", ["am"] = "am",
		["an"] = "an", ["ang"] = "aŋ", ["ap"] = "ap̚", ["at"] = "at̚",
		["ak"] = "ak̚", 

		["i"] = "i", ["iu"] = "iu", ["im"] = "im", ["in"] = "in", 
		["ip"] = "ip̚", ["it"] = "it̚",

		["ie"] = "iɛ", ["iau"] = "iau", ["iam"] = "iam", ["iang"] = "iaŋ", 
		["iap"] = "iap̚", ["iak"] = "iak̚",

		["u"] = "u", ["ui"] = "ui", ["un"] = "un", ["ut"] = "ut̚", 

		["ei"] = "ei", ["eu"] = "eu", ["em"] = "em", ["en"] = "en", 
		["uung"] = "ɵŋ", ["ep"] = "ep̚", ["et"] = "et̚", ["uuk"] = "ɵk̚", ["uut"] = "ɵt̚",

		["o"] = "ᵘɔ", ["oi"] = "ᵘɔi", ["on"] = "ᵘɔn", ["ong"] = "ɔŋ", 
		["ot"] = "ᵘɔt̚", ["ok"] = "ɔk̚", 
		
		["m"] = "m̩"
	}
	
	local hsv_tone = { "³³", "⁵⁵", "²²", "²¹", "³²" }
	
	local gsplit = mw.text.gsplit

	local result = {}
	for word in gsplit(text, ",") do
		local initial, final, tone, tone_ch, word_result = "", "", "", "", {}
		for syllable in gsplit(word, " ") do
			initial, final, tone, tone_ch = syllable:match("^([^aeiou]*)([^1-5]*)([1-5])([%*%-]?[1-5]?%*?)$")
			if final == "" then final, initial = initial, "" end
			
			if not hsv_initial[initial] or not hsv_final[final] or not hsv_tone[tonumber(tone)] or (tone == "2" and tone_ch == "*") then
				error("Syllable '" .. syllable .. "' is not a valid syllable for IPA conversion.")
			end
			
			table.insert(word_result,
				hsv_initial[initial] .. hsv_final[final] .. hsv_tone[tonumber(tone)] ..
				(tone_ch ~= "" and "⁻" or "") ..
				(tone_ch == "*" and hsv_tone[tonumber(tone)] or (tone_ch:find("^%-[1-5]") and hsv_tone[tonumber(tone_ch:sub(2, 2))] or "")) ..
				(tone_ch:sub(-1, -1) == "*" and "⁵" or ""))
		end
		table.insert(result, table.concat(word_result, " "))
	end
	return "/" .. table.concat(result, "/, /") .. "/"
end

return export