Module:is-pronunciation

Note: This module is unfinished and should NOT be used in entries.

This module generates IPA from Icelandic orthography, using the rules given at Icelandic orthography.

Testcases edit

4 of 50 tests failed. (refresh)

TextExpectedActual
test_pron:
Passedþornˈθɔrtn̥ˈθɔrtn̥
Passedhiminnˈhɪːmɪnːˈhɪːmɪnː
Passedbrúnnˈprutn̥ˈprutn̥
Passedsteinnˈstɛi̯tn̥ˈstɛi̯tn̥
Failedgeimsteinn (respelled geim-steinn)ˈcɛi̯mstɛi̯tn̥ˈcɛi̯mˌstɛi̯tn̥
Passedkarlˈkʰartl̥ˈkʰartl̥
Passedruslˈrʏstl̥ˈrʏstl̥
Passedbysnaˈpɪstn̥aˈpɪstn̥a
Passedráps (respelled ráp.s)ˈrau̯ːpsˈrau̯ːps
Passedtakaˈtʰaːkaˈtʰaːka
Passedþökkˈθœhkˈθœhk
Passedvopnˈvɔhpn̥ˈvɔhpn̥
Passedbrotnaˈprɔhtn̥aˈprɔhtn̥a
Passedsaknaˈsahkn̥aˈsahkn̥a
Passedkembtˈcʰɛm̥tˈcʰɛm̥t
Passedþiðˈθɪːðˈθɪːð
Passedguðˈkvʏːðˈkvʏːð
Passedbyggjaˈpɪcːaˈpɪcːa
Passedsyngjaˈsincaˈsinca
Passedmunkurˈmuŋkʏrˈmuŋkʏr
Passedöngullˈœy̯ŋkʏtl̥ˈœy̯ŋkʏtl̥
Passeddrengurˈtrɛi̯ŋkʏrˈtrɛi̯ŋkʏr
Passedsvangurˈsvau̯ŋkʏrˈsvau̯ŋkʏr
PassedEnglandˈɛi̯ŋlantˈɛi̯ŋlant
Passedsegjaˈsɛi̯ːjaˈsɛi̯ːja
Passedflugaˈflʏːɣaˈflʏːɣa
Passedfljúgaˈfljuːaˈfljuːa
Passedbógurˈpou̯ːʏrˈpou̯ːʏr
Passedlágurˈlau̯ːʏrˈlau̯ːʏr
Passedprófaˈpʰrou̯ːaˈpʰrou̯ːa
Passeddagsˈtaxsˈtaxs
Passeddragtˈtraxtˈtraxt
Failedguðspjall (respelled guð-spjall)ˈkvʏðspjatl̥ˈkvʏðˌspjatl̥
Passedseptemberˈsɛftɛmpɛrˈsɛftɛmpɛr
Passedoktóberˈɔxtou̯pɛrˈɔxtou̯pɛr
Passedgjaldaˈcaltaˈcalta
Passedgetaˈcɛːtaˈcɛːta
Passedkjósaˈcʰou̯ːsaˈcʰou̯ːsa
Failedkeyraˈcʰɛi̯ːraˈcʰɛi̯raː
Passedkirkjaˈcʰɪrcaˈcʰɪrca
Passedhlýrˈl̥iːrˈl̥iːr
Passedhrattˈr̥ahtˈr̥aht
Passedsparaˈspaːraˈspaːra
Passedþykjaˈθɪːcaˈθɪːca
Passedlofaˈlɔːvaˈlɔːva
Passedrósˈrou̯ːsˈrou̯ːs
Passedvaxaˈvaxsaˈvaxsa
Failedmylla (respelled myl-la)ˈmɪlːaˈmɪlˌlaː
Passednuddaˈnʏtːaˈnʏtːa
Passedkaþólikkiˈkʰaːθou̯lɪhcɪˈkʰaːθou̯lɪhcɪ

local export = {}

local lang = require("Module:languages").getByCode("is")
local sc = require("Module:scripts").getByCode("Latn")
local m_ipa = require("Module:IPA")

function export.tag_text(text, face)
	return require("Module:script utilities").tag_text(text, lang, sc, face)
end

function export.link(term, face)
	return require("Module:links").full_link( { term = term, lang = lang, sc = sc }, face )
end

local sub = mw.ustring.sub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local len = mw.ustring.len
local lower = mw.ustring.lower

local U = require("Module:string/char")
local nonsyllabic = U(0x32F)    -- inverted breve below
local voiceless = U(0x325)      -- combining ring below
local long = U(0x2D0)           -- triangular colon
local primary_stress = "ˈ"
local secondary_stress = "ˌ"

local consonants = "bdðfghjklmnprstvxþ"
local consonant = "[" .. consonants .. "]"

local vowels = "aɛɪiʏyœɔou"
local vowel = "[" .. vowels .. "]+" .. nonsyllabic .. "?" .. long .. "?"

local stress = "[" .. primary_stress .. secondary_stress .. "]"

-- pronunciation data
local data = {
	-- word-initial consonants
	["initial"] = {
		["b"] = "p",
		["d"] = "t",
		["g"] = "k",
		["p"] = "pʰ",
		["t"] = "tʰ",
		["k"] = "kʰ",
		["f"] = "f",
		["gj"] = "c",
		["kj"] = "cʰ",
		["hv"] = "kv",
		["þ"] = "θ",
		["hl"] = "l" .. voiceless,
		["hn"] = "n" .. voiceless,
		["hr"] = "r" .. voiceless,
		["hj"] = "ç"
	},
	["trigraphs"] = {
		["fnd"] = "mt",
		["fnt"] = "m" .. voiceless .. "t",
		["mbd"] = "mt",
		["mbg"] = "mk",
		["mbs"] = "ms",
		["mbt"] = "m" .. voiceless .. "t"
	},
	["digraphs"] = {
		["kj"] = "c",
		["ll"] = "tl" .. voiceless,
		["rl"] = "rtl" .. voiceless,
		["rn"] = "rtn" .. voiceless,
		["sl"] = "stl" .. voiceless,
		["sn"] = "stn" .. voiceless,
		["au"] = "œy" .. nonsyllabic,
		["ei"] = "ɛi" .. nonsyllabic,
		["ey"] = "ɛi" .. nonsyllabic
	},
	-- word-internal consonants
	["internal"] = {
		["b"] = "p",
		["d"] = "t",
		["x"] = "xs",
		["f"] = "v",
		["þ"] = "θ"
	},
	-- vowels: regular, before gi, before ng/nk
	["vowels"] = {
		["a"] = {
			"a",
			"ai" .. nonsyllabic,
			"au" .. nonsyllabic
		},
		["á"] = {
			"au" .. nonsyllabic,
			"au" .. nonsyllabic,
			"au" .. nonsyllabic
		},
		["e"] = {
			"ɛ",
			"ei" .. nonsyllabic,
			"ɛi" .. nonsyllabic
		},
		["é"] = {
			"jɛ",
			"jɛ",
			"jɛ"
		},
		["i"] = {
			"ɪ",
			"i",
			"i"
		},
		["í"] = {
			"i",
			"i",
			"i"
		},
		["o"] = {
			"ɔ",
			"ɔi" .. nonsyllabic,
			"ɔi" .. nonsyllabic
		},
		["ó"] = {
			"ou" .. nonsyllabic,
			"ou" .. nonsyllabic,
			"ou" .. nonsyllabic
		},
		["u"] = {
			"ʏ",
			"ʏi" .. nonsyllabic,
			"u"
		},
		["ú"] = {
			"u",
			"u",
			"u"
		},
		["æ"] = {
			"ai" .. nonsyllabic,
			"ai" .. nonsyllabic,
			"ai" .. nonsyllabic
		},
		["ö"] = {
			"œ",
			"œy" .. nonsyllabic,
			"œy" .. nonsyllabic
		}
	}
}

-- add data for preaspirated stop clusters
for letter_a in gmatch("ptk", ".") do
	data.digraphs[letter_a .. letter_a] = "h" .. letter_a
	for letter_b in gmatch("lmn", ".") do
		data.digraphs[letter_a .. letter_b] = "h" .. letter_a .. letter_b .. voiceless
	end
end

-- list pronunciation substitutions
local rules = {
	[1] = {
		["(" .. stress .. consonant .. "*" .. vowel .. ")nn"] = "%1tn" .. voiceless,
		["(" .. vowel .. ")" .. "g" .. "([aʏðlr])"] = "%1ɣ%2",
		["(" .. vowel .. ")" .. "g" .. "([ji])"] = "%1j%2",
		["(" .. vowel .. ")" .. "[kg]" .. "([ts])"] = "%1x%2",
		["(" .. vowel .. ")" .. "p" .. "([tsk])"] = "%1f%2",
		["ng([ls])"] = "ŋ%1"
	},
	[2] = {
		["(u" .. nonsyllabic .. "?" .. long .. "?)[vɣ]"] = "%1",
		["g"] = "k",
		["kʏ(" .. long .. "?)ð"] = "kvʏ%1ð"
	},
	[3] = {
		["k(ʰ?[ɛiɪ])"] = "c%1",
		["k(ʰ?ai)"] = "c%1",
		["kj"] = "c",
		["(" .. long .. "?)jj"] = "i" .. nonsyllabic .. "%1j"
	},
	[4] = {
		["nk"] = "ŋk",
		["kc"] = "c" .. long,
		["(.)%1"] = "%1" .. long
	}
}

-- function to determine vowel length
local function determineLength(v, next_chars)
	-- short if before x as it's treated like two consonants
	if find(next_chars, "x") then
		return v
	-- long if word-final, preceding a single consonant followed by a vowel
	-- or preceding the consonant clusters b/d/g/k/p/s/t + j/r/v
	elseif len(next_chars) <= 1 or
		find(next_chars, consonant .. "[^" ..  consonants .. "%-]") or
		find(next_chars, "[bdgkpst][jrv]") then
		return v .. long
	-- short otherwise
	else
		return v
	end
end

-- function to determine vowel type
local function determineVowel(v, term, pos, stressed)
	-- check next two chars
	local next_chars = sub(term, pos + 1, pos + 2)

	-- before ng/nk
	if next_chars == "ng" or next_chars == "nk" then
		return data.vowels[v][3]
	-- before gi
	elseif next_chars == "gi" then
		return data.vowels[v][2]
	-- determine vowel length if stressed (FIXME!)
	elseif stressed ~= false then
		return determineLength(data.vowels[v][1], next_chars)
	-- otherwise
	else
		return data.vowels[v][1]
	end
end

-- function to count syllables
local function countSyllables(term)
	local count = 0
	local poss = {}

	-- match positions of all vowels
	for i in gmatch(term, vowel) do
		count = count + 1
		table.insert(poss, i)
	end

	-- return syllable count
	return count, poss
end

-- function to generate rhyme
local function getRhyme(term)
	local count, poss = countSyllables(term)
	local start = 0

	-- mark start of rhyme
	if count == 1 then
		-- start at last syllable
		start = "-" .. term[poss[1]]
	else
		-- start at second-last syllable
		start = "-" .. term[poss[count - 1]]
	end

	-- return rhymes
	return sub(term, start)
end

-- function to generate transcription
function export.toIPA(term, stressed)
	if type(term) ~= "string" then
		error('The function "toIPA" requires a string argument.')
	end

	-- initialise pronunciation
	term = lower(term)
	term = gsub(term, "y", "i"):gsub("ý", "í") -- y is the same as i
	local IPA = {}
	local pos = 1

	-- add initial stress (FIXME!)
	if stressed ~= false then
		table.insert(IPA, primary_stress)
	end

	-- handle initial letters
	if find(sub(term, 1, 1), consonant) then
		-- handle digraphs
		if data.initial[sub(term, 1, 2)] then
			table.insert(IPA, data.initial[sub(term, 1, 2)])
			pos = 3
		-- handle single consonants
		elseif data.initial[sub(term, 1, 1)] then
			table.insert(IPA, data.initial[sub(term, 1, 1)])
			pos = 2
		-- otherwise
		else
			table.insert(IPA, sub(term, 1, 1))
			pos = 2
		end
	end

	-- handle rest of string
	while pos <= len(term) do
		-- handle trigraphs
		if data.trigraphs[sub(term, pos, pos + 2)] then
			table.insert(IPA, data.trigraphs[sub(term, pos, pos + 2)])
			pos = pos + 3
		-- handle digraphs
		elseif data.digraphs[sub(term, pos, pos + 1)] then
			table.insert(IPA, data.digraphs[sub(term, pos, pos + 1)])
			pos = pos + 2
		-- handle vowels
		elseif data.vowels[sub(term, pos, pos)] then
			table.insert(IPA, determineVowel(sub(term, pos, pos), term, pos, stressed))
			stressed = false
			pos = pos + 1
		-- handle internal consonants
		elseif pos ~= 1 and data.internal[sub(term, pos, pos)] then
			table.insert(IPA, data.internal[sub(term, pos, pos)])
			pos = pos + 1
		-- handle compound stress
		elseif sub(term, pos, pos) == "-" then
			stressed = true
			table.insert(IPA, secondary_stress)
			pos = pos + 1
		-- otherwise
		else
			table.insert(IPA, sub(term, pos, pos))
			pos = pos + 1
		end
	end

	-- combine ipa symbols into single string
	local pron = table.concat(IPA)

	-- apply phonemic rules
	for _, set_of_rules in ipairs(rules) do
		for regex, replacement in pairs(set_of_rules) do
			pron = gsub(pron, regex, replacement)
		end
	end

	-- remove any unwanted characters (e.g., hyphens, full stops)
	pron = gsub(pron, "[%-%.]", "")

	return pron
end

-- main export function
function export.show(frame)
	local params = {
		[1] = {},
		[2] = {}
	}

	local title = mw.title.getCurrentTitle()

	local args = require("Module:parameters").process(frame:getParent().args, params)
	local term = args[1] or title.text
	local accent = args[2]

	local ipa = export.toIPA(term, accent)

	ipa = "/" .. ipa .. "/"

	return m_ipa.format_IPA_full(lang, { { pron = ipa } })
end

return export