
This module page is experimental.
The details of its operation have not yet been fully decided upon. Do not deploy widely until the module page is finished.

This module does romanisation conversion, IPA conversion, etc. for Jianghuai Mandarin. See {{zh-pron}}.

local export = {}
local find = mw.ustring.find
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local gmatch = mw.ustring.gmatch
local gsplit = mw.text.gsplit
local lower = mw.ustring.lower
local upper = mw.ustring.upper

local initialConv = {
	["b"] = "p", ["d"] = "t", ["g"] = "k",
	["p"] = "pʰ", ["t"] = "tʰ", ["k"] = "kʰ",
	["z"] = "t͡s", ["j"] = "t͡ɕ",
	["c"] = "t͡sʰ", ["q"] = "t͡ɕʰ",
	["m"] = "m", ["l"] = "l", 
	["f"] = "f", ["s"] = "s", ["x"] = "ɕ", ["h"] = "x",
	['r'] = 'ʐ', ['zh'] = 'ʈ͡ʂ',
	['ch'] = 'ʈ͡ʂʰ', ['sh'] = 'ʂ',
	[""] = "",

-- note that 'ir' is for internal use by the code and not used in actual sichuanese pinyin
local finalConv = {
	["y"] = "ɿ", ["r"] = "ʅ", ["er"] = "ɚ",
	["a"] = "a", ["o"] = "o", ["e"] = "e",
	["ä"] = "ɛ", ["ei"] = "əɪ", ["ao"] = "ɔ", ["ou"] = "əɯ",
	["en"] = "ən", ["än"] = "ẽ", ["ang"] = "ã", ["ong"] = "oŋ",

	["i"] = "i", ["ia"] = "ia", ["ie"] = "ie",
	["iä"] = "iɛ", ["iao"] = "iau", ["iou"] = "iəɯ",
	["iän"] = "iẽ", ["in"] = "in", ["iang"] = "iã", ["iong"] = "ioŋ",

	["u"] = "u", ["ua"] = "ua", ["uä"] = "uɛ",
	["uei"] = "uəɪ", ["uen"] = "uən", ["uang"] = "uã",

	["ü"] = "y", ["üe"] = "ye",
	["üän"] = "yẽ", ["üin"] = "yin", 

local toneConv = {
	["1"] = "³¹", ["2"] = "¹³", ["3"] = "²¹²", ["4"] = "⁴⁴", ["5"] = "⁵",   ["-"] = "⁻",

local function fix(initial, final)

	return initial, final

local function warn(initial, final, tone)
    if (initial == "j" or initial == "q" or initial == "x") and final == "u" then
		error("Syllables in Nankinese Pinyin do not include ju, qu, xu, but include jü, qü, xü.")
	if (initial == "j" or initial == "q" or initial == "x") and ( final == "iao"  or final == "io"  or final == "iang"  or final == "iä"  or final == "iän"  or final == "ia" or final == "ie"  or final == "iong" ) then
		error("Syllables in Nankinese Pinyin do not include ji+vowel, qi+vowel, xi+vowel. You can try j-, q-, x- instead ji-, qi-, xi- ")
	if (initial == "j" or initial == "q" or initial == "x") and ( final == "iou"  or final == "iu") then
		error("Nankinese Pinyin does not use jiou, qiou ,xiou ,jiu, qiu ,xiu, but uses jou, qou, xou instead.")
	if (initial == "zh" or initial == "ch" or initial == "sh" or initial == "r") and (final == "i") then
		error("Nankinese Pinyin does not use zhi, chi, shi ,ri, but uses zhr, chr, shr, r instead.")
	if  (final == "uo") then
		error("Nankinese Pinyin does not use -uo, but uses -o instead.")

function export.convert(text, scheme)
	if type(text) == "table" then
		text, scheme = text.args[1], text.args[2]

	local result = {}
	for word in gsplit(text, '/') do
		local converted = {}

		local extra2 = match(word, '^[^A-ZÄÜa-zäü]*')
		for syllable in gmatch(word, '[A-ZÄÜa-zäü]+[%d%-]+[^A-ZÄÜa-zäü]*') do
			local initial, final, erhua, tone, extra = match(syllable, '^([BDGPTKZJCQLMNFSXHVRWUIÜbdgptkzjlcqmnfsxhvrw]?h?)([AEOÄÜRIUYaiueoäüyrng]+)(r?)([%d%-]+)([^A-ZÄÜa-zäü]*)$')
			local caps = false

			if find(initial .. final, '[A-Z]') then
				caps = true
				initial, final = lower(initial), lower(final)

			warn(initial, final, tone)

			initial, final = fix(initial, final)
			if final == 'e' and erhua == 'r' then
				final, erhua = 'er', ''

			if scheme == 'IPA' then
				initial = initialConv[initial]
				final = finalConv[final]
				tone = gsub(tone, '.', function(char) return toneConv[char] end)

				if erhua == 'r' then
					if find(final, '^ü') then -- 撮口呼
						final = 'uɚ'
					elseif find(final, '^i') then -- 齊齒呼
						final = 'ɚ'
					elseif find(final, '^u') then -- 合口呼
						final = 'uɚ'
					elseif final == 'o' or final == 'ong' then
						final = 'uɚ'
					else -- 開口呼
						final = 'ɚ'

				syllable = initial .. final .. tone

				table.insert(converted, syllable)
			elseif scheme == 'NKG' then
				initial = initialConv_swz[initial] or initial
				final = finalConv_swz[final] or final

				tone = gsub(tone, '(%d)%-(%d)', '%2')

				-- idk what happens with erhua, so disable output
				if erhua == 'r' then return false end

				syllable = initial .. final
				if caps then syllable = gsub(syllable, '^(.)', upper) end

				table.insert(converted, '@' .. syllable .. extra)
				error('Convert to what representation?')

		if scheme == 'IPA' then
			table.insert(result, table.concat(converted, ' '))
		elseif scheme == 'NKG' then
			local text = table.concat(converted, '')
			text = gsub(text, '([a-z])@(u)', '%1w')
			text = gsub(text, '([a-z])@(i)', '%1j')
			text = gsub(text, '([ng])@(y)', '%1j')
			text = gsub(text, '@un', 'wen')
			text = gsub(text, '@', '')
			table.insert(result, extra2 .. text)

	if scheme == 'IPA' then
		return '/' .. table.concat(result, '/, /') .. '/'
		return table.concat(result, ' / ')

return export