Module:User:Surjection/urj-fin-pro-IPA

This is a private module sandbox of Surjection, for their own experimentation. Items in this module may be added and removed at Surjection's discretion; do not rely on this module's stability.


-- This is a toy not meant for serious use.

local export = {}
local langcode = "urj-fin-pro"

local LONG = "ː"
local SEMILONG = "ˑ"
local PRIMARY_STRESS = "ˈ"
local SECONDARY_STRESS = "ˌ"
local AUTO_STRESS = "ˌ"
local NONSYLLABIC = mw.ustring.char(0x032F)
local UNRELEASED = mw.ustring.char(0x031A)
local CENTRAL = mw.ustring.char(0x031E)

local GRAPHEME = ".[" .. mw.ustring.char(0x0300) .. "-" .. mw.ustring.char(0x036F) .. "]*"

local vowels = {
	["a"] = "ɑ", ["e"] = "e", ["o"] = "o", ["u"] = "u",
	["ä"] = "æ", ["ë"] = "ɤ", ["ö"] = "ø", ["ü"] = "y",
	["i"] = "i",
}

local consonants = {
	["k"] = "k", ["p"] = "p", ["t"] = "t",
	["g"] = "ɣ", ["b"] = "β", ["d"] = "ð",
	["s"] = "s", ["h"] = "h", ["v"] = "ʋ", ["r"] = "r", ["c"] = "ʦ",
	["j"] = "j", ["l"] = "l", ["m"] = "m", ["n"] = "n",
}

local IPA_VOWELS = ""
local IPA_CONSONANTS = ""

for _, value in pairs(vowels) do IPA_VOWELS = IPA_VOWELS .. value end
for _, value in pairs(consonants) do IPA_CONSONANTS = IPA_CONSONANTS .. value end

function export.convert(text, is_phonetic)
	local prefix = mw.ustring.find(text, "%-$")
	local suffix = mw.ustring.find(text, "^%-")
	
	if prefix then text = mw.ustring.gsub(text, "%-$", "") end
	if suffix then text = mw.ustring.gsub(text, "^%-", "") end
	
	text = mw.ustring.gsub(text, "([aeiouäëöy])%1", "%1" .. LONG)
	text = mw.ustring.gsub(text, "([kptc])'", "%1" .. SEMILONG)
	text = mw.ustring.gsub(text, "([kptcnmlrs])%1", "%1" .. LONG)
	
	if not is_phonetic then
		-- weak and semi-long consonants were probably allophones
		text = mw.ustring.gsub(text, "[bdg]", {["b"] = "p", ["d"] = "t", ["g"] = "k"})
		text = mw.ustring.gsub(text, SEMILONG, LONG)
	end
	
	text = mw.ustring.gsub(text, "([aeouäëu])i", "%1" .. "i" .. NONSYLLABIC)
	text = mw.ustring.gsub(text, "([aeëio])u", "%1" .. "u" .. NONSYLLABIC)
	text = mw.ustring.gsub(text, "([äeiö])ü", "%1" .. "ü" .. NONSYLLABIC)
	
	text = mw.ustring.gsub(text, "n([kg])", "ŋ%1")
	text = mw.ustring.gsub(text, "%-", SECONDARY_STRESS)
	
	text = mw.ustring.gsub(text, "[aeiouäëöü]", vowels)
	text = mw.ustring.gsub(text, "[bcdghjklmnprstv]", consonants)
	
	if is_phonetic then
		text = mw.ustring.gsub(text, "mβ", "mb")
		text = mw.ustring.gsub(text, "nð", "nd")
		text = mw.ustring.gsub(text, "ŋɣ", "ŋg")
		
		text = mw.ustring.gsub(text, "k$", "k" .. UNRELEASED)
	end
	
	if suffix then
		text = "-" .. text
	else
		text = PRIMARY_STRESS .. text
	end
	text = mw.ustring.gsub(text, " ", " " .. PRIMARY_STRESS)
	
	if prefix then
		text = text .. "-"
	end
	
	if is_phonetic then
		-- automatic secondary stress
		local old_text = text
		text = ""
		
		local IPA_VOWEL = "^[" .. IPA_VOWELS .. "]$"
		local vowels_total = 0
		local vowels_found = 0
		local do_not_stress = {}
		local distance = 0
		
		for phone in mw.ustring.gmatch(old_text, GRAPHEME) do
			if mw.ustring.match(phone, IPA_VOWEL) then
				vowels_total = vowels_total + 1
			elseif mw.ustring.match(phone, "[" .. PRIMARY_STRESS .. SECONDARY_STRESS .. "]") then
				do_not_stress[vowels_total] = true
			end
		end
		
		do_not_stress[vowels_total] = true
		
		for phone in mw.ustring.gmatch(old_text, GRAPHEME) do
			if mw.ustring.match(phone, IPA_VOWEL) then
				vowels_found = vowels_found + 1
				if not do_not_stress[vowels_found] then
					if distance == 2 then
						text = text .. AUTO_STRESS
						distance = 0
					else
						distance = distance + 1
					end
				end
			elseif mw.ustring.match(phone, "[" .. PRIMARY_STRESS .. SECONDARY_STRESS .. "]") then
				distance = 0
			end

			text = text .. phone
		end
		
		if mw.ustring.match(text, "^" .. AUTO_STRESS, last_vowel) then
			text = mw.ustring.sub(text, 1, last_vowel - 1) .. mw.ustring.sub(text, last_vowel + 1)
		end
		
		text = mw.ustring.gsub(text, "([" .. IPA_CONSONANTS .. "])" .. AUTO_STRESS, AUTO_STRESS .. "%1")
		
		text = mw.ustring.gsub(text, "([eoɤø])", "%1" .. CENTRAL)
	end
	
	text = mw.ustring.gsub(text, "[gʦ]", { ["g"] = "ɡ", ["ʦ"] = "t͡s" } )
	
	return text
end

local use_UPA_stress = true
local UPA_SHORT = mw.ustring.char(0x0306)
local UPA_LONG = mw.ustring.char(0x0304)
local UPA_DIPHTHONG = mw.ustring.char(0x0361)
local UPA_UNRELEASED = mw.ustring.char(0x2FE)

local UPA_vowels = {
	["ë"] = "e̮",
}

local UPA_consonants = {
	["g"] = "γ", ["b"] = "β", ["d"] = "δ", ["c"] = "ʦ",
}

function export.convert_UPA(text, is_phonetic)
	local prefix = mw.ustring.find(text, "%-$")
	local suffix = mw.ustring.find(text, "^%-")
	
	if prefix then text = mw.ustring.gsub(text, "%-$", "") end
	if suffix then text = mw.ustring.gsub(text, "^%-", "") end
	
	text = mw.ustring.gsub(text, "([aeiouäëöy])%1", "%1" .. UPA_LONG)
	text = mw.ustring.gsub(text, "([kptc])'", "%1" .. UPA_SHORT .. "%1")
	
	if not is_phonetic then
		-- weak and semi-long consonants were probably allophones
		text = mw.ustring.gsub(text, "[bdg]", {["b"] = "p", ["d"] = "t", ["g"] = "k"})
		text = mw.ustring.gsub(text, SEMILONG, LONG)
	end
	
	text = mw.ustring.gsub(text, "([aeouäëu])i", "%1" .. UPA_DIPHTHONG .. "i")
	text = mw.ustring.gsub(text, "([aeëio])u", "%1" .. UPA_DIPHTHONG .. "u")
	text = mw.ustring.gsub(text, "([äeiö])ü", "%1" .. UPA_DIPHTHONG .. "ü")
	
	text = mw.ustring.gsub(text, "n([kg])", "ŋ%1")
	text = mw.ustring.gsub(text, "%-", SECONDARY_STRESS)
	
	text = mw.ustring.gsub(text, "[ë]", UPA_vowels)
	text = mw.ustring.gsub(text, "[bcdg]", UPA_consonants)
	
	if is_phonetic then
		text = mw.ustring.gsub(text, "mβ", "mb")
		text = mw.ustring.gsub(text, "nδ", "nd")
		text = mw.ustring.gsub(text, "ŋγ", "ŋg")
		
		text = mw.ustring.gsub(text, "k$", "k" .. UPA_UNRELEASED)
	end
	
	if suffix then
		text = "-" .. text
	else
		text = PRIMARY_STRESS .. text
	end
	text = mw.ustring.gsub(text, " ", " " .. PRIMARY_STRESS)
	
	if prefix then
		text = text .. "-"
	end
	
	if is_phonetic then
		-- automatic secondary stress
		local old_text = text
		text = ""
		
		local UPA_VOWEL = "^[aeiouäöü]̮?$"
		local vowels_total = 0
		local vowels_found = 0
		local do_not_stress = {}
		local distance = 0
		
		for phone in mw.ustring.gmatch(old_text, GRAPHEME) do
			if mw.ustring.match(phone, UPA_VOWEL) then
				vowels_total = vowels_total + 1
			elseif mw.ustring.match(phone, "[" .. PRIMARY_STRESS .. SECONDARY_STRESS .. "]") then
				do_not_stress[vowels_total] = true
			end
		end
		
		do_not_stress[vowels_total] = true
		
		for phone in mw.ustring.gmatch(old_text, GRAPHEME) do
			if mw.ustring.match(phone, UPA_VOWEL) then
				vowels_found = vowels_found + 1
				if not do_not_stress[vowels_found] then
					if distance == 2 then
						text = text .. AUTO_STRESS
						distance = 0
					else
						distance = distance + 1
					end
				end
			elseif mw.ustring.match(phone, "[" .. PRIMARY_STRESS .. SECONDARY_STRESS .. "]") then
				distance = 0
			end

			text = text .. phone
		end
		
		if mw.ustring.match(text, "^" .. AUTO_STRESS, last_vowel) then
			text = mw.ustring.sub(text, 1, last_vowel - 1) .. mw.ustring.sub(text, last_vowel + 1)
		end
		
		text = mw.ustring.gsub(text, "([" .. IPA_CONSONANTS .. "γβδ])" .. AUTO_STRESS, AUTO_STRESS .. "%1")
	end
	
	text = mw.ustring.gsub(text, "[gʦ]", { ["g"] = "ɡ", ["ʦ"] = "t͡s" } )

    if use_UPA_stress then
        text = mw.ustring.gsub(text, "([ˈˌ])(.-)([aeiouäöü" .. mw.ustring.char(0x0300) .. "-" .. mw.ustring.char(0x036F) .. "]+)", "%2%3%1")
        text = mw.ustring.gsub(text, "ˈ", "·")
        text = mw.ustring.gsub(text, "ˌ", ":")
    end
	
	return text
end

function export.show(frame)
	local title = mw.title.getCurrentTitle().text
	if type(frame) == "table" then
		title = frame:getParent().args[1] or mw.ustring.gsub(title, "^Proto-Finnic/", "")
	end
	
	local phonemic = export.convert(title, false)
	local phonetic = export.convert(title, true)
	
	return require "Module:IPA".format_IPA_full(
		require("Module:languages").getByCode(langcode),
		{
			{pron = "*/" .. phonemic .. "/"},
			{pron = "*[" .. phonetic .. "]"}
		})

end

return export