Module:mh-pronunc/sandbox


-- This module is primarily maintained at:
-- https://en.wiktionary.org/wiki/Module:mh-pronunc
-- Please direct all technical queries and contributions there.
-- The version of this script on Wikipedia is only a mirror.

local export = {}

local ASYLLABIC = "̯"
local BREVE = "̆"
local BREVE2 = "͝"
local CEDILLA = "̧"
local CENTRAL = "̈"
local DEVOICE = "̥"
local DEVOICE2 = "̊"
local LESSROUND = "̜"
local LESSROUND2 = "͑"
local MACRON = "̄"
local MOREROUND = "̹"
local MOREROUND2 = "͗"
local SYLLABIC = "̩"
local TIE = "͡"
local TIE2 = "͜"

local EPENTH_CLUSTER = 0
local ASSIM_CLUSTER  = 1
local STABLE_CLUSTER = 2

-- Forward-declare functions.
local addUnique
local assign
local fastTrim
local lerpF2
local mergedMidVowelsMap
local needClusterTypes
local needPhoneticMap
local needVowelCharts
local parse
local parseBoolean
local reverseString
local splitTokens
local string_gsub2
local string_gsubx
local toBender
local toMOD
local toPhonemic
local toPhonetic
local toPhoneticDialect
local toPhoneticRemainder

-- Forward-declare lookup tables.
local benderMaps
local clusterTypes
local fromF1
local fromF2
local fromF2Conson
local parseC_CH_CWmap
local parsePseudoConsonMap
local parseRemainingMap
local phonemicMap
local phoneticMap
local toF1
local toF2
local toMODmap
local voicedPrimaries



-- Adds elements to a sequence as if it's a set (retains unique elements only).
addUnique = function(seq, value)
	for _, value2 in pairs(seq) do
		if value == value2 then
			return
		end
	end
	seq[#seq + 1] = value
end



-- Intended to work the same as JavaScript's Object.assign() function.
assign = function(target, ...)
	local args = { ... }
	for _, source in pairs(args) do
		if type(source) == "table" then
			for key, value in pairs(source) do
				target[key] = value
			end
		end
	end
	return target
end



fastTrim = function(text)
	return string.match(text, "^%s*(.-)%s*$")
end



lerpF2 = function(secondaryL, secondaryR)
	needVowelCharts()
	return fromF2[0.5 * (toF2[secondaryL] + toF2[secondaryR])]
end



needClusterTypes = function()
	
	if clusterTypes then
		return
	end
	
	local EPENTH = {
		["j"] = EPENTH_CLUSTER,
		["G"] = EPENTH_CLUSTER,
		["w"] = EPENTH_CLUSTER
	}
	local ASSIM = {
		["j"] = ASSIM_CLUSTER,
		["G"] = ASSIM_CLUSTER,
		["w"] = ASSIM_CLUSTER
	}
	local STABLE = {
		["j"] = STABLE_CLUSTER,
		["G"] = STABLE_CLUSTER,
		["w"] = STABLE_CLUSTER
	}
	local EMPTY  = {
		["p"] = EPENTH, ["t"] = EPENTH, ["k"] = EPENTH,
		["m"] = EPENTH, ["n"] = EPENTH, ["N"] = EPENTH,
		["r"] = EPENTH, ["l"] = EPENTH, ["h"] = EPENTH, ["y"] = EPENTH
	}
	clusterTypes = {
		["p"] = assign({}, EMPTY, {
			["p"] = STABLE, -- /pp/
			["m"] = ASSIM   -- /pm/ becomes [mm]
		}),
		["t"] = assign({}, EMPTY, {
			["t"] = STABLE  -- /tt/
		}),
		["k"] = assign({}, EMPTY, {
			["k"] = STABLE, -- /kk/
			["N"] = ASSIM   -- /kŋ/ becomes [ŋŋ]
		}),
		["m"] = assign({}, EMPTY, {
			["p"] = STABLE, -- /mp/
			["m"] = STABLE  -- /mm/
		}),
		["n"] = assign({}, EMPTY, {
			["t"] = STABLE, -- /nt/
			["n"] = STABLE, -- /nn/
			["r"] = STABLE, -- /nr/
			["l"] = STABLE  -- /nl/
		}),
		["N"] = assign({}, EMPTY, {
			["k"] = STABLE, -- /ŋk/
			["N"] = STABLE  -- /ŋŋ/
		}),
		["r"] = assign({}, EMPTY, {
			["n"] = ASSIM,  -- /rn/ becomes [nn]
			["r"] = STABLE, -- /rr/
			["l"] = STABLE  -- /rl/
		}),
		["l"] = assign({}, EMPTY, {
			["t"] = assign({}, STABLE, {
				["j"] = EPENTH_CLUSTER, -- /ltʲ/
			}),
			["n"] = ASSIM,  -- /ln/ becomes [nn]
			["r"] = STABLE, -- /lr/
			["l"] = STABLE  -- /ll/
		}),
		["h"] = EMPTY
	}
	
end



needPhoneticMap = function()

	if phoneticMap then
		return
	end

	needVowelCharts()

	local map = {
		["p"] = "p",
		["b"] = "b",
		["t"] = "t",
		["d"] = "d",
		["k"] = "k",
		["g"] = "ɡ",
		["m"] = "m",
		["n"] = "n",
		["N"] = "ŋ",
		["r"] = "r",
		["l"] = "l",
		["Hj"] = "j",
		["HG"] = "ɰ",
		["Hw"] = "w",
		["_"] = "‿",
		["j"] = "ʲ",
		["G"] = "ˠ",
		["w"] = "ʷ",
		["a1"] = "æ",
		["E1"] = "ɛ",
		["e1"] = "e",
		["i1"] = "i",
		["a2"] = "a",
		["E2"] = "ɜ",
		["e2"] = "ɘ",
		["i2"] = "ɨ",
		["a3"] = "ɑ",
		["E3"] = "ʌ",
		["e3"] = "ɤ",
		["i3"] = "ɯ",
		["a5"] = "ɒ",
		["E5"] = "ɔ",
		["e5"] = "o",
		["i5"] = "u",
		["^"] = ASYLLABIC,
		["@"] = "",
		["("] = "(",
		[")"] = ")",
		[":"] = "ː",
		["="] = "",
		["\""] = "ˈ",
		["%"] = "ˌ",
		[","] = "",
		["\\"] = ""
	}
	
	assign(map, false and {
		["P"] = "b̥",
		["T"] = "d̥",
		["K"] = "ɡ̊"
	} or {
		["P"] = map["p"],
		["T"] = map["t"],
		["K"] = map["k"]
	})
	
	if false then
		for primary in mw.text.gsplit("kKgN", "") do
			map[primary.."G"] = map[primary]
		end
	end
	
	map["Hj"] = map["Hj"] or map["i1^"] or (map["i1"]..map["^"])
	map["i1^"] = map["i1^"] or map["Hj"]
	map["yj"] = map["yj"] or map["i1^"]
	
	map["i3^"] = map["i3^"] or map["HG"]
	if true then
		assign(map, {
			["i3^"] = "ɰ",
			["e3^"] = "ʁ",
			["E3^"] = "ʁ",
			["a3^"] = "ʕ"
		})
	end
	
	if true then
		for f1 in mw.text.gsplit("aEei", "") do
			local key = f1.."5^"
			map[key] = map[key] or map["Hw"]
		end
	end
	
	for primary in mw.text.gsplit("pPbtTdkKgmnNrl_ \t\n", "") do
		for secondary in mw.text.gsplit("jGw", "") do
			local key = primary..secondary
			map[key] = map[key] or ((map[primary] or primary)..map[secondary])
		end
	end

	for f1 = 1, 4 do
		local vowelF1 = fromF1[f1]
		local vowel = vowelF1.."2"
		map[vowel] = map[vowel] or (map[vowelF1.."1"]..CENTRAL)
		vowel = vowelF1.."4"
		map[vowel] = map[vowel] or (map[vowelF1.."5"]..LESSROUND2)
		for f2 = 1, 5 do
			vowel = vowelF1..fromF2[f2]
			local semi = vowel.."="
			map[semi] = map[semi] or (map[vowel]..map["="])
			semi = vowel.."@"
			map[semi] = map[semi] or (map[vowel]..map["@"])
			semi = vowel.."^"
			map[semi] = map[semi] or (map[vowel]..map["^"])
		end
	end

	phoneticMap = map

end



needVowelCharts = function()

	if toF1 then
		return
	end

	toF1 = {
		["a"] = 1, ["E"] = 2, ["e"] = 3, ["i"] = 4,
		[ 1 ] = 1, [ 2 ] = 2, [ 3 ] = 3, [ 4 ] = 4
	}
	fromF1 = {
		[ 1 ] = "a", [ 2 ] = "E", [ 3 ] = "e", [ 4 ] = "i",
		["a"] = "a", ["E"] = "E", ["e"] = "e", ["i"] = "i"
	}
	toF2 = {
		["j"] = 1,            ["G"] = 3,            ["w"] = 5,
		["1"] = 1, ["2"] = 2, ["3"] = 3, ["4"] = 4, ["5"] = 5,
		[ 1 ] = 1, [ 2 ] = 2, [ 3 ] = 3, [ 4 ] = 4, [ 5 ] = 5
	}
	fromF2 = {
		[ 1 ] = "1", [ 2 ] = "2", [ 3 ] = "3", [ 4 ] = "4", [ 5 ] = "5",
		["1"] = "1", ["2"] = "2", ["3"] = "3", ["4"] = "4", ["5"] = "5",
		["j"] = "1",              ["G"] = "3",              ["w"] = "5"
	}
	fromF2Conson = {
		[ 1 ] = "j", [ 3 ] = "G", [ 5 ] = "w",
		["1"] = "j", ["3"] = "G", ["5"] = "w",
		["j"] = "j", ["G"] = "G", ["w"] = "w"
	}

end



parse = function(code)

	local outSeq = {}
	code = mw.ustring.gsub(code, "%s+", " ")
	code = string.lower(code)
	for text in mw.text.gsplit(code, " *,[ ,]*") do

		text = fastTrim(text)
		if text ~= "" then

			local temp = string.gsub(text, "[abdeghijklmnprtwy_&'%- ]", "")
			if temp ~= "" then
				error("'"..code.."' contains unsupported characters: "..temp)
			end

			-- Recognize "y_", "h_", "w_", "_y", "_h", "_w" as pseudoconsonants.
			parsePseudoConsonMap = parsePseudoConsonMap or {
				["y"] = "0",
				["h"] = "0h",
				["w"] = "0w"
			}
			text = string.gsub(text, "_*([hwy])_+", parsePseudoConsonMap)
			text = string.gsub(text, "_+([hwy])", parsePseudoConsonMap)
			if string.find(text, "_") then
				error("contains misplaced underscores: "..code)
			end

			-- a plain {i} protected from dialect-specific reflexes
			text = string.gsub(text, "'i", "I")

			-- "yi'y" and "'yiy" sequences
			text = string.gsub(text, "('?)yi('*)y", function(aposA, aposB)
				if aposA ~= "" then
					-- "dwelling upon" i
					return "Z"
				elseif aposB ~= "" then
					-- "passing over lightly" i
					return "z"
				end
			end)

			-- Convert multigraphs to pseudo-X-SAMPA format.
			parseC_CH_CWmap = parseC_CH_CWmap or {
				["k"]   = "kG",
				["kh"]  = "kGh", -- N\A
				["kw"]  = "kW",
				["l"]   = "lJ",
				["lh"]  = "lG",
				["lw"]  = "lW",
				["m"]   = "mJ",
				["mh"]  = "mG",
				["mw"]  = "mJw", -- N\A
				["n"]   = "nJ",
				["nh"]  = "nG",
				["nw"]  = "nW",
				["ng"]  = "NG",
				["ngh"] = "NGh", -- N\A
				["ngw"] = "NW",
				["r"]   = "rG",
				["rh"]  = "rGh", -- N\A
				["rw"]  = "rW",
				["0"]   = "_J",
				["0h"]  = "_G",
				["0w"]  = "_W"
			}
			text = string.gsub(text, "[klmnr0]g?[hw]?", parseC_CH_CWmap)
			if string.find(text, "g") then
				error("contains g that is not part of ng: "..code)
			end

			-- Convert remaining sequences to pseudo-X-SAMPA format.
			parseRemainingMap = parseRemainingMap or {
				["b"] = "pG",
				["d"] = "rj",
				["e"] = "E",
				["&"] = "e",
				["h"] = "hG",
				["j"] = "tj",
				["J"] = "j",
				["p"] = "pj",
				["t"] = "tG",
				["w"] = "hw",
				["W"] = "w",
				["y"] = "hj",
				["z"] = "yj",
				["Z"] = "Yj",
				["'"] = ""
			}
			text = string.gsub(text, ".", parseRemainingMap)

			-- Enforce CVC, CVCVC, CVCCVC, etc. phonotactics,
			-- but allow VC, CV at affix boundaries
			-- where a vowel may link to another morpheme's consonant.
			temp = string.gsub(text, "[%s%-]+", "")
			if	string.find(temp, "_..[jGw]") or
				string.find(temp, ".[jGw]_.")
			then
				error("pseudoconsonants may not neighbor a consonant")
			end
			if string.find(temp, "[aEeIi]_.[aEeIi]") then
				error(
					"pseudoconsonants may only be at the beginning or end"..code
				)
			end
			if string.find(temp, "[aEeIi][aEeIi]") then
				error("vowels must be separated by a consonant: "..code)
			end
			if string.find(temp, ".[jGw].[jGw]$") then
				error("may not end with a consonant cluster: "..code)
			end
			string.gsub(" "..temp, "[ jGw](.[jGw])(.[jGw][ptkmnNrlhyYjGw]*)",
				function(consonX, consonY)
					if consonX ~= consonY then
						error(
							"may not begin with a consonant cluster "..
							"unless it is a geminate: "..code
						)
					end
				end
			)

			if text ~= "" then
				addUnique(outSeq, text)
			end

		end

	end

	return outSeq

end



parseBoolean = function(text)
	if type(text) == "string" then
		text = string.gsub(text, "[^0-9A-Za-z]", "")
		if	text ~= "" and
			text ~= "0" and
			string.lower(text) ~= "false"
		then
			return true
		end
	end
	return false
end



reverseString = function(text)
	local chars = splitTokens(text)
	local i = 1
	local j = #chars
	while i < j do
		chars[i], chars[j] = chars[j], chars[i]
		i = i + 1
		j = j - 1
	end
	text = table.concat(chars, "")
	return text
end



splitTokens = function(text, pattern, chars, shorten)
	chars = chars or {}
	local index = 1
	for ch in string.gmatch(
		text, pattern or "[%z\1-\127\194-\244][\128-\191]*"
	) do
		chars[index] = ch
		index = index + 1
	end
	if index <= #chars then
		if shorten then
			table.remove(chars, index)
		else
			repeat
				chars[index] = nil
				index = index + 1
			until index > #chars
		end
	end
	return chars
end



string_gsub2 = function(text, pattern, subst)
	local result = text
	result = string.gsub(result, pattern, subst)
	-- If it didn't change the first time, it won't change the second time.
	if result ~= text then
		result = string.gsub(result, pattern, subst)
	end
	return result
end



string_gsubx = function(text, pattern, subst)
	repeat
		local oldText = text
		text = string.gsub(text, pattern, subst)
	until oldText == text
	return text
end



toBender = function(inSeq, args)
	-- "1968" is from "Marshallese Phonology" (1968 by Byron W. Bender).
	-- "med" is from the Marshallese-English Dictionary (1976).
	-- "mod" is from the Marshallese-English Online Dictionary.
	-- "default" is the same as "mod" but with cedillas.
	local version = args and args.version
	if not benderMaps then
		local map1968 = {
			["pj"] = "p", ["pG"] = "b",
			["tj"] = "j", ["tG"] = "t",
			              ["kG"] = "k", ["kw"] = "q",
			["mj"] = "m", ["mG"] = "ṁ",
			["nj"] = "n", ["nG"] = "ṅ", ["nw"] = "n̈",
			              ["NG"] = "g", ["Nw"] = "g̈",
			["rj"] = "d", ["rG"] = "r", ["rw"] = "r̈",
			["lj"] = "l", ["lG"] = "ł", ["lw"] = "l̈",
			["yj"] = "yi'y",
			["Yj"] = "'yiy",
			["hj"] = "y", ["hG"] = "h", ["hw"] = "w",
			["_j"] = "",  ["_G"] = "",  ["_w"] = "",
			["a"]  = "a",
			["E"]  = "e",
			["e"]  = "&",
			["i"]  = "i",
			["I"]  = "i"
		}
		local mapMED = assign({}, map1968, {
			["mG"] = "m̧",
			["nG"] = "ņ",
			["nw"] = "ņ°",
			["Nw"] = "g°",
			["rw"] = "r°",
			["lG"] = "ļ",
			["lw"] = "ļ°",
			["e"]  = "ȩ"
		})
		local mapMOD = assign({}, mapMED, {
			["kw"] = "kʷ",
			["mG"] = "ṃ",
			["nG"] = "ṇ",
			["nw"] = "ṇʷ",
			["Nw"] = "gʷ",
			["rw"] = "rʷ",
			["lG"] = "ḷ",
			["lw"] = "ḷʷ",
			["e"]  = "ẹ"
		})
		local mapDefault = assign({}, mapMOD, {
			["mG"] = "m̧",
			["nG"] = "ņ",
			["nw"] = "ņʷ",
			["lG"] = "ļ",
			["lw"] = "ļʷ",
			["e"]  = "ȩ"
		})
		benderMaps = {
			["1968"]    = map1968,
			["med"]     = mapMED,
			["mod"]     = mapMOD,
			["default"] = mapDefault
		}
	end
	local map = benderMaps[
		type(version) == "string" and string.lower(version) or ""
	] or benderMaps["default"]
	local outSeq = {}
	for _, text in pairs(inSeq) do
		text = string.gsub(text, ".[jGw]?", map)
		addUnique(outSeq, text)
	end
	return outSeq
end



toMOD = function(text)
	toMODmap = toMODmap or {
		["Ȩ"] = "Ẹ", ["ȩ"] = "ẹ",
		["Ļ"] = "Ḷ", ["ļ"] = "ḷ",
		["M̧"] = "Ṃ", ["m̧"] = "ṃ",
		["Ņ"] = "Ṇ", ["ņ"] = "ṇ",
		["N̄"] = "Ñ", ["n̄"] = "ñ",
		["O̧"] = "Ọ", ["o̧"] = "ọ"
	}
	text = mw.ustring.gsub(text, ".["..CEDILLA..MACRON.."]?", toMODmap)
	return text
end



toPhonemic = function(inSeq)
	local outSeq = {}
	if not phonemicMap then
		local map = {
			["pj"] = "pʲ", ["pG"] = "pˠ",
			["tj"] = "tʲ", ["tG"] = "tˠ",
			               ["kG"] = "kˠ", ["kw"] = "kʷ",
			["mj"] = "mʲ", ["mG"] = "mˠ",
			["nj"] = "nʲ", ["nG"] = "nˠ", ["nw"] = "nʷ",
			               ["NG"] = "ŋˠ", ["Nw"] = "ŋʷ",
			["rj"] = "rʲ", ["rG"] = "rˠ", ["rw"] = "rʷ",
			["lj"] = "lʲ", ["lG"] = "lˠ", ["lw"] = "lʷ",
			["hj"] = "j",  ["hG"] = "ɰ",  ["hw"] = "w",
			["_j"] = "",   ["_G"] = "",   ["_w"] = "",
			["a"]  = "æ",
			["E"]  = "ɛ",
			["e"]  = "e",
			["i"]  = "i",
			["I"]  = "i"
		}
		phonemicMap = map
		if false then
			assign(map, {
				["a"] = "ɐ",
				["E"] = "ə",
				["e"] = "ɘ",
				["i"] = "ɨ",
				["I"] = "ɨ"
			})
		end
		map["yj"] = map.hj..map.i..ASYLLABIC..map.hj
		map["Yj"] = map.hj..map.i.."ː"..map.hj
	end
	for _, text in pairs(inSeq) do
		text = string.gsub(text, ".[jGw]?", phonemicMap)
		addUnique(outSeq, text)
	end
	return outSeq
end



toPhonetic = function(inSeq, args)

	-- Recognize "ralik" for Rālik Chain (western dialect).
	-- Recognize "ratak" for Ratak Chain (eastern dialect).
	-- For other values, list both possible dialect reflexes where applicable.
	local dialect = args and args.dialect and
		mw.ustring.lower(mw.text.trim(args.dialect)) or ""
	if dialect == "rālik" then
		dialect = "ralik"
	end

	-- If enabled, break words at consonant cluster boundaries
	-- and enunciate the word fragments individually.
	-- This mode does not assimilate clusters or produce epenthetic vowels.
	local enunciate = not not (args and parseBoolean(args.enunciate))

	-- If enabled, display liaison joiners to mark
	-- spaces or hyphens in the input code that are not consonant clusters.
	local liaison = not not (args and parseBoolean(args.liaison))

	-- If enabled, do not display pseudoconsonant hints at all.
	local noHints = not not (args and parseBoolean(args.nohints))

	-- "false" will display all obstruent allophones as voiceless.
	-- "true" will display all obstruent allophones as voiced.
	-- Empty string or absent by default will display
	-- only medial obstruent allophones as semi-voiced.
	local voice = args and args.voice or ""
	if voice ~= "" then
		voice = parseBoolean(voice)
	end

	local outSeq = {}
	local config = {
		["outSeq"] = outSeq,
		["enunciate"] = enunciate,
		["liaison"] = liaison,
		["noHints"] = noHints,
		["voice"] = voice
	}

	for _, text in pairs(inSeq) do
		text = string.gsub(text, "[%s%-]+", " ")
		text = fastTrim(text)
		local isRalik = dialect == "ralik"
		if isRalik or dialect == "ratak" then
			text = toPhoneticDialect(text, config, isRalik)
			toPhoneticRemainder(text, config)
		else
			local ralik = toPhoneticDialect(text, config, true)
			local ratak = toPhoneticDialect(text, config, false)
			-- If both dialect reflexes are the same, display only one of them.
			toPhoneticRemainder(ralik, config)
			if ralik ~= ratak then
				toPhoneticRemainder(ratak, config)
			end
		end
	end

	return outSeq

end



toPhoneticDialect = function(text, config, isRalik)

	-- To streamline morpheme-initial regular expressions.
	text = "\t"..text

	-- Morphemes can begin with geminated consonants, but spoken words cannot.
	text = string.gsub(text, "([\tjGw] *)(.[jGw])( *)%2( *)([aEeIi])",
		function(prefix, conson, _, __, vowel)
			local copyVowel = vowel
			if vowel == "I" then
				copyVowel = "i"
			elseif
				vowel == "a" and
				conson ~= "hG"
			then
				copyVowel = "E"
			end
			if isRalik then
				return prefix.."hj"..copyVowel..conson.._..conson..__..vowel
			elseif conson == "hw" then
				return prefix..conson..copyVowel..conson.._..conson..__..vowel
			else
				return prefix..conson..copyVowel.._..conson..__..vowel
			end
		end
	)

	-- Initial {yiyV-, yiwV-, wiwV-} sequences have special behavior.
	-- To block this in the template argument, use "'i" instead of "i".
	if isRalik then
		-- Rālik {wiwV-} becomes {yiwV-}.
		text = string.gsub(text, "([\tjGw] *h)w( *i *hw *[aEeIi])", "%1j%2")
	end
	-- {[yw]iwV-} becomes {[yw]iwwV-} in both dialects.
	text = string.gsub(text, "([\tjGw] *h[jw] *i *hw)( *[aEeIi])", "%1hw%2")
	-- {yiyV-} sequences
	text = string.gsub(text,
		"([\tjGw] *)hj( *)i( *)hj( *[aEeIi])",
		isRalik and "%1Yj%2%3%4" or "%1yj%2%3%4"
	)

	-- No longer need initial "\t".
	text = text.sub(text, 2)

	-- Don't need to protect {i} anymore.
	text = string.gsub(text, "I", "i")

	return text

end



toPhoneticRemainder = function(code, config)

	-- "\n" bookends pronunciations of full terms.
	-- "\t" bookends prosodic breaks within pronunciations.
	local text = "\n\t"..code.."\t\n"
	local oldText

	-- Handle pseudoconsonants and phrases that begin or end with bare vowels.
	local hasLeftVowel = string.find(code, "^_")
	if not hasLeftVowel then
		hasLeftVowel = string.find(code, "^[aEei]")
		if hasLeftVowel then
			text = string.gsub(
				text,
				"\n\t".."([aEei][^\t]*)".."\t\n",
				"\n\t".."_j%1".."\t\n"..
				"\n\t".."_G%1".."\t\n"..
				"\n\t".."_w%1".."\t\n"
			)
		end
	end
	local hasRightVowel = string.find(code, "_.$")
	if not hasRightVowel then
		hasRightVowel = string.find(code, "[aEei]$")
		if hasRightVowel then
			text = string.gsub(
				text,
				"\n\t".."([^\t]-[aEei])".."\t\n",
				"\n\t".."%1_j".."\t\n"..
				"\n\t".."%1_G".."\t\n"..
				"\n\t".."%1_w".."\t\n"
			)
		end
	end
	local hasEdgeVowel = hasLeftVowel or hasRightVowel
	if hasEdgeVowel then
		text = string.gsub(text, "/", "\t\t")
	end

	local enunciate = config.enunciate
	local liaison   = config.liaison
	local noHints   = config.noHints
	local outSeq    = config.outSeq

	-- Use liaison if we're enunciating.
	liaison = liaison or enunciate

	if enunciate then
		-- Create a prosodic break at consonant clusters.
		text = string.gsub(text, "([jGw]) *(.[jGw])", "%1".."\t\t".."%2")
	end
	
	-- Per the Marshallese Reference Grammar.
	if false then
		
		-- Non-phrase-initial {yi'y-} vocalizes to true {yiy}.
		text = string.gsub(text, "([^\t] *)yj", "%1hjihj")
		
	-- Experimental, to fix the iọkiọkwe problem.
	else
		
		-- Non-phrase-initial {yi'y-}
		-- vocalizes to true {yiy} at the beginning of a word,
		-- but not in a non-initial position within a word.
		text = string.gsub(text, " yj", " hjihj")
		
	end

	-- {'yiy} vocalizes contextually.
	do

		-- To {iyy} after a consonant.
		if not enunciate then
			text = string.gsub(text, "([jGw] *)Yj", "%1ihjhj")
		end

		-- To {yiyy} everywhere else.
		text = string.gsub(
			text, "Yj", enunciate and ("hjihj".."\t\t".."hj") or "hjihjhj"
		)

	end

	-- Mid-vowel harmony assimilation across semiconsonants.
	do

		-- Always {e-a}, never {ẹ-a}.
		text = string.gsub(text, "e([ hjGw]*a)", "E%1")

		-- Always {ẹ-i}, never {e-i}.
		text = string.gsub(text, "E([ hjGw]*i)", "e%1")

		-- Always {e-e} and {ẹ-ẹ}, never {e-ẹ} or {ẹ-e}.
		text = string.gsub(text, "[Ee][ hjGw]*[Ee][ hjGwEe]*",
			function(match)
				local index = string.find(text, "[Ee][^Ee]*$")
				local vowel = string.sub(text, index, index)
				match = string.gsub(match, "[Ee]", vowel)
				return match
			end
		)

	end

	-- Detect and mark stressed syllables, but not if this term is an affix.
	if not hasEdgeVowel then

		-- Temporarily mark the end of the term's bookend as stressed.
		text = string.gsub(text, "(\t[\t\n])", "\"%1")

		-- Temporarily mark all natural syllables as unstressed.
		text = string.gsub(text, "(.[jGw] *[aEei])", ",%1")

		-- Recursively place stress before each CVC, CVCV and CVCCV sequence.
		text = string_gsubx(

			text,

			",("..
			".[jGw] *[aEei] *[ptkmnNrlh]?[jGw]? *"..
			",?"..
			".[jGw] *[aEei]? *"..
			"\"[^\t]*\t"..
			")",

			"\"%1"

		)
		
		-- Remove dangling syllable markers from the term's bookends.
		text = string.gsub(text, " *\"? *\t *,? *", "\t")
		
		-- Remove all unstressed syllable markers.
		text = string.gsub(text, ",", "")
		
		if not enunciate then
			-- Restore unstressed syllable markers
			-- only within consonant clusters that are not already stressed.
			-- These will be removed again later anyway.
			text = string.gsub(text, "([jGw] *)(.[jGw])", "%1,%2")
		end
		
		-- If there is more than one stressed syllable,
		-- then mark the penultimate stressed syllable as primarily stressed,
		-- and the others as secondarily stressed.
		if string.find(text, "\"[^\"\t]*\"[^\t]*\t") then
			text = string.gsub(text, "\"", "%%")
			text = string.gsub(text, "%%([^%%\t]*%%[^%%\t]*\t)", "\"%1")
		end

	end
	
	-- Mark full vowels as syllabic.
	text = string.gsub(text, "([aEei])", "%1=")
	
	if not enunciate then

		-- Tag consonant clusters for the next operation.
		oldText = text
		text = string.gsub(text, "(.[jGw])( *[\"%%,]?.[jGw])", "%1/%2")
		
		needClusterTypes()
		
		-- Process unstable and assimilating consonant clusters.
		if oldText ~= text then
			text = string_gsub2(
				text,
				"([aEei])(= *[\"%%,]?)(.)([jGw])/"..
				"( *[\"%%,]?)(.)([jGw])( *)([aEei])",
				function(
					vowelL, _, primaryL, secondaryL,
					__, primaryR, secondaryR, ___, vowelR
				)
					local vowelE = ""
					local markE = ""
					local cluster = clusterTypes[primaryL][primaryR][secondaryR]
					if cluster == EPENTH_CLUSTER then
						-- An epenthetic vowel will be inserted.
						if primaryL == "h" then
							-- If the first consonant is a semiconsonant,
							-- then copy the vowel on the left.
							vowelE = vowelL
						elseif primaryR == "h" then
							-- If the first consonant is a full consonant
							-- but the second consonant is a semicomsonant,
							-- then copy the vowel on the right.
							vowelE = vowelR
						elseif primaryR == "y" then
							-- If the first consonant is a full consonant
							-- but the second consonant is {yi'y},
							-- then the epenthetic vowel is {i},
							-- and the second consonant becomes plain {y}.
							vowelE = "i"
							primaryR = "h"
						else
							-- If neither consonant is a semiconsonant,
							-- then the epenthetic vowel has an F1
							-- that is the maximum of
							-- the two neighboring vowels and {e}.
							vowelE = fromF1[math.max(
								toF1[vowelL],
								toF1[vowelR],
								toF1["E"]
							)]
						end
						markE = "@"
					else
						-- No epenthetic vowel.
						if cluster == ASSIM_CLUSTER then
							-- Regressive primary assimilation.
							primaryL = primaryR
						end
						if	secondaryL == "w" and
							primaryR ~= "t"
						then
							-- Progressive secondary assimilation.
							-- But there is no {tʷ} in Marshallese.
							secondaryR = secondaryL
						else
							-- Regressive secondary assimilation.
							secondaryL = secondaryR
						end
					end
					return (
						vowelL.._..primaryL..secondaryL..vowelE..markE..
						__..primaryR..secondaryR..___..vowelR
					)
				end
			)
		end

	end

	needVowelCharts()
	
	-- Give a default F2 to vowels,
	-- averaging the F2 of their two neighboring consonants.
	-- This can also create transitional vowels whose F2
	-- have no direct counterparts with consonant secondary articulation.
	text = string_gsub2(text, "([jGw])( *.)([=@] *[\"%%,]?.)([jGw])",
		function(secondaryL, _, __, secondaryR)
			return secondaryL.._..lerpF2(secondaryL, secondaryR)..__..secondaryR
		end
	)
	
	-- Unconditionally surface semiconsonants in complete isolation.
	oldText = text
	text = string.gsub(text, "\t *h(.) *\t", "\tH%1\t")
	
	-- If the term contains any other semiconsonants...
	if	oldText == text and
		string.find(text, "h")
	then
		
		local hasVG  = false
		local hasGV  = false
		local hasVGV = false
		
		-- Give unsurfaced semiconsonants a surface F1
		-- matching the vowels on their left.
		text = string.gsub(text, "([aEei])(.[=@] *[\"%%,]?)h(.)",
			function(vowelF1, _, secondary)
				hasVG = true
				return vowelF1.._..vowelF1..fromF2[toF2[secondary]].."^"
			end
		)
	
		-- Adjust the F1 of surfaced semiconsonants
		-- according to the vowels on their right.
		-- To the maximum of the vowel if {y} or {w}.
		-- To the minimum of the vowel if {h}.
		if hasVG then
			text = string.gsub(text, "(.)(.)(%^ *)([aEei])",
				function(semiF1, semiF2, _, vowelF1)
					hasGV = true
					hasVGV = true
					local fn = semiF2 == "3" and math.min or math.max
					return fromF1[fn(
						toF1[semiF1], toF1[vowelF1]
					)]..semiF2.._..vowelF1
				end
			)
		end
	
		-- Give remaining unsurfaced semiconsonants a surface F1
		-- matching the vowels on their right.
		text = string.gsub(text, "h(.)( *)([aEei])",
			function(secondary, _, vowelF1)
				hasGV = true
				return vowelF1..fromF2[toF2[secondary]].."^".._..vowelF1
			end
		)
		
		local startsGV = hasGV and not not string.find(text, "\t *[\"%%,]?..%^")
		local endsVG   = hasVG and not not string.find(text, "%^ *\t")
		
		if not enunciate then
			
			-- If a vowel comes before a semiconsonant of the same F1,
			-- then change the vowel's F2 to match the the semiconsonant.
			if hasVG then
				text = string.gsub(
					text, "(.).([=@] *[\"%%,]?)%1(.)", "%1%3%2%1%3"
				)
			end
			
			-- If a non-open vowel comes after {y} of the same F1
			-- and before a velarized full consonant,
			-- then change the vowel's F2 to match the {y}.
			if hasGV then
				text = string.gsub(
					text,
					"([Eei])(1)(%^ *)%1.([=@] *[\"%%,]?[ptkmnNrl]G)",
					"%1%2%3%1%2%4"
				)
			end
			
			-- If a non-open vowel comes after {y} of the same F1
			-- and before a syllable stress boundary,
			-- then change the vowel's F2 to match the {y}.
			if hasGV then
				text = string.gsub(
					text, "([Eei])(1)(%^ *)%1.([=@] *[\"%%,])", "%1%2%3%1%2%4"
				)
			end
			
			-- If {a} comes after {y} of the same F1 after a stressed vowel,
			-- then change the vowel's F2 to match the {y}.
			if hasVGV then
				text = string.gsub(text, "(= *a)(1)(%^ *a).", "%1%2%3%2")
			end
			
			-- If a vowel comes after {w} of the same F1 after a stressed vowel,
			-- then change the vowel's F2 to match the {w}.
			if hasVGV then
				text = string_gsub2(
					text, "(= *)(.)(5)(%^ *)%2.", "%1%2%3%4%2%3"
				)
			end
			
			-- If a vowel comes after {h}...
			if hasGV then
				text = string.gsub(
					text, "(.)(3)(%^ *)(.).([=@] *[\"%%,]?.)([jw15])",
					function(semiF1, semiF2, _, vowelF1, __, secondary)
						local vowelF2
						if semiF1 == vowelF1 then
							-- If they have the same F2,
							-- then change the vowel's F2 to match the {h}.
							vowelF2 = semiF2
						else
							-- If they do not have the same F2,
							-- then reset the vowel's F2.
							vowelF2 = lerpF2(semiF2, secondary)
						end
						return (
							semiF1..semiF2.._..vowelF1..vowelF2..__..secondary
						)
					end
				)
			end
			
			-- If a vowel comes after {y} or {w}
			-- at the beginning of a prosodic unit
			-- and before a stress boundary
			-- before a semiconsonant and another vowel
			-- that have the same F2 as each other
			-- and both have the same F1 as the first vowel,
			-- then change the first vowel's F2 to match.
			if hasVGV then
				text = string.gsub(
					text,
					"\t *(.[15]%^ *)(.).([=@] *[\"%%,])%2(.)(%^ *)%2%4",
					"\t%1%2%4%3%2%4%5%2%4"
				)
			end
			
		end
		
		-- Unsurface {h} everywhere.
		text = string.gsub(text, ".3%^", "hG")
		
		-- Unsurface semiconsonants that can coalesce
		-- with either of their neighboring vowels,
		-- but not crossing syllable stress boundaries.
		if hasGV then
			text = string.gsub(text, "(.)(.)%^( *)%1%2",
				function(vowelF1, vowelF2, _)
					return "h"..fromF2Conson[toF2[vowelF2]].._..vowelF1..vowelF2
				end
			)
		end
		if hasVG then
			text = string.gsub(text, "(.)(.)(= *)%1%2%^",
				function(vowelF1, vowelF2, _)
					return vowelF1..vowelF2.._.."h"..fromF2Conson[toF2[vowelF2]]
				end
			)
		end
		
		-- Adjust the F1 of remaining surfaced {y} and {w}.
		text = string.gsub(text, "(.)([15])%^", function(semiF1, semiF2)
			if semiF2 == "1" then
				if semiF1 == "a" then
					semiF1 = "E"
				end
			else -- semiF1 == "5"
				semiF1 = "i"
			end
			return semiF1..semiF2.."^"
		end)
		
		-- Delete remaining unsurfaced semiconsonants altogether.
		text = string.gsub(text, "h.", "")
		
		if hasVGV and not enunciate then
			-- Indicate certain long monophthongs as geminated.
			text = string.gsub(text, "([aEei].)[=@]( *)%1[=@]", "%1=%2:")
			text = string.gsub(
				text, "([aEei].)[=@]( *[\"%%,])%1[=@]([^:])", "%1=%2:%3"
			)
		end
		
		-- If a weakened semiconsonant falls on a stressed syllable
		-- before a vowel with the same F2,
		-- then shift forward the stress marker.
		text = string.gsub(text, "([\"%%,])0(.)( *[aEei])(.)",
			function(stress, semiF2, _, vowelF2)
				if toF2[semiF2] == toF2[vowelF2] then
					return "0"..semiF2..stress.._..vowelF2
				end
			end
		)
		
	end
	
	-- Neutralize the difference between full and epenthetic vowels.
	text = string.gsub(text, "[=@]", "")
	
	-- Simplify secondary articulation of consonant clusters.
	text = string.gsub(text, "([jGw])( *[\"%%,]?.)%1", "%2%1")

	-- Partially voice obstruents before vowels at the beginning of a phrase or
	-- in consonant clusters after other obstruents or laterals.
	text = string.gsub(text, "([ptkl\t] *[\"%%,]?)([ptk])(. *[aEei])",
		function(_, primary, __)
			return _..string.upper(primary)..__
		end
	)
	
	voicedPrimaries = voicedPrimaries or {
		["p"] = "b", ["t"] = "d", ["k"] = "g"
	}
	
	-- Voice remaining obstruents before vowels.
	text = string.gsub(text, "([ptk])(. *%(?[aEei])", function(primary, _)
		return voicedPrimaries[primary].._
	end)
	
	if hasEdgeVowel then
		if noHints then
			-- Strip pseudoglides.
			text = string.gsub(text, "_.", "")
		elseif hasLeftVowel then
			-- Reverse text of left pseudoglide.
			text = string.gsub(text, "\t *_(.)", "\t%1_")
		end
	end

	if liaison then

		-- Remove whitespace from bookends.
		text = string.gsub(text, " *\t *", "\t")

		-- Prepare liaisons.
		text = string.gsub(text, "[ _]+", "_")

	else

		-- Strip liaisons.
		text = string.gsub(text, " ", "")

	end

	if enunciate then
		-- Convert bookends to spaces.
		text = string.gsub(text, "\t+", " ")
	end

	needPhoneticMap()

	-- Convert pseudo-X-SAMPA to phonetic IPA.
	text = string.gsub(text, ".[jGw1-5]?%^?", phoneticMap)

	-- Output unique pronunciations.
	string.gsub(text, "\n[^\n]*\n", function(result)
		addUnique(outSeq, fastTrim(result))
		return ""
	end)

end



export._parse = parse
export._toBender = toBender
export._toMOD = toMOD
export._toPhonemic = toPhonemic
export._toPhonetic = toPhonetic

function export.bender(frame)
	return table.concat(toBender(parse(frame.args[1], frame.args)), ", ")
end

function export.MOD(frame)
	return toMOD(frame.args[1])
end

function export.parse(frame)
	return table.concat(parse(frame.args[1]), ", ")
end

function export.phonemic(frame)
	return table.concat(toPhonemic(parse(frame.args[1])), ", ")
end

function export.phonetic(frame)
	return table.concat(toPhonetic(parse(frame.args[1]), frame.args), ", ")
end

return export