Module:is-pronunciation

The following documentation is located at Module:is-pronunciation/documentation. ^[edit] Categories were auto-generated by Module:module categorization. ^[edit]

Useful links: subpage list • links • transclusions • testcases • sandbox

Note: This module is unfinished and should NOT be used in entries.

This module generates IPA from Icelandic orthography, using the rules given at Icelandic orthography.

Testcases

4 of 50 tests failed. (refresh)

test_pron:
Text	Expected	Actual
þorn	ˈθɔrtn̥	ˈθɔrtn̥
himinn	ˈhɪːmɪnː	ˈhɪːmɪnː
brúnn	ˈprutn̥	ˈprutn̥
steinn	ˈstɛi̯tn̥	ˈstɛi̯tn̥
geimsteinn (respelled geim-steinn)	ˈcɛi̯mstɛi̯tn̥	ˈcɛi̯mˌstɛi̯tn̥
karl	ˈkʰartl̥	ˈkʰartl̥
rusl	ˈrʏstl̥	ˈrʏstl̥
bysna	ˈpɪstn̥a	ˈpɪstn̥a
ráps (respelled ráp.s)	ˈrau̯ːps	ˈrau̯ːps
taka	ˈtʰaːka	ˈtʰaːka
þökk	ˈθœhk	ˈθœhk
vopn	ˈvɔhpn̥	ˈvɔhpn̥
brotna	ˈprɔhtn̥a	ˈprɔhtn̥a
sakna	ˈsahkn̥a	ˈsahkn̥a
kembt	ˈcʰɛm̥t	ˈcʰɛm̥t
þið	ˈθɪːð	ˈθɪːð
guð	ˈkvʏːð	ˈkvʏːð
byggja	ˈpɪcːa	ˈpɪcːa
syngja	ˈsinca	ˈsinca
munkur	ˈmuŋkʏr	ˈmuŋkʏr
öngull	ˈœy̯ŋkʏtl̥	ˈœy̯ŋkʏtl̥
drengur	ˈtrɛi̯ŋkʏr	ˈtrɛi̯ŋkʏr
svangur	ˈsvau̯ŋkʏr	ˈsvau̯ŋkʏr
England	ˈɛi̯ŋlant	ˈɛi̯ŋlant
segja	ˈsɛi̯ːja	ˈsɛi̯ːja
fluga	ˈflʏːɣa	ˈflʏːɣa
fljúga	ˈfljuːa	ˈfljuːa
bógur	ˈpou̯ːʏr	ˈpou̯ːʏr
lágur	ˈlau̯ːʏr	ˈlau̯ːʏr
prófa	ˈpʰrou̯ːa	ˈpʰrou̯ːa
dags	ˈtaxs	ˈtaxs
dragt	ˈtraxt	ˈtraxt
guðspjall (respelled guð-spjall)	ˈkvʏðspjatl̥	ˈkvʏðˌspjatl̥
september	ˈsɛftɛmpɛr	ˈsɛftɛmpɛr
október	ˈɔxtou̯pɛr	ˈɔxtou̯pɛr
gjalda	ˈcalta	ˈcalta
geta	ˈcɛːta	ˈcɛːta
kjósa	ˈcʰou̯ːsa	ˈcʰou̯ːsa
keyra	ˈcʰɛi̯ːra	ˈcʰɛi̯raː
kirkja	ˈcʰɪrca	ˈcʰɪrca
hlýr	ˈl̥iːr	ˈl̥iːr
hratt	ˈr̥aht	ˈr̥aht
spara	ˈspaːra	ˈspaːra
þykja	ˈθɪːca	ˈθɪːca
lofa	ˈlɔːva	ˈlɔːva
rós	ˈrou̯ːs	ˈrou̯ːs
vaxa	ˈvaxsa	ˈvaxsa
mylla, special=true	ˈmɪlːa	ˈmɪtl̥a
nudda	ˈnʏtːa	ˈnʏtːa
kaþólikki	ˈkʰaːθou̯lɪhcɪ	ˈkʰaːθou̯lɪhcɪ

local export = {}

local lang = require("Module:languages").getByCode("is")
local sc = require("Module:scripts").getByCode("Latn")
local m_ipa = require("Module:IPA")

function export.tag_text(text, face)
	return require("Module:script utilities").tag_text(text, lang, sc, face)
end

function export.link(term, face)
	return require("Module:links").full_link( { term = term, lang = lang, sc = sc }, face )
end

local sub = mw.ustring.sub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local len = mw.ustring.len
local lower = mw.ustring.lower

local U = require("Module:string/char")
local nonsyllabic = U(0x32F)    -- inverted breve below
local voiceless = U(0x325)      -- combining ring below
local long = U(0x2D0)           -- triangular colon
local primary_stress = "ˈ"
local secondary_stress = "ˌ"

local consonants = "bdðfghjklmnprstvxþ"
local consonant = "[" .. consonants .. "]"

local vowels = "aɛɪiʏyœɔou"
local vowel = "[" .. vowels .. "]+" .. nonsyllabic .. "?" .. long .. "?"

local stress = "[" .. primary_stress .. secondary_stress .. "]"

-- pronunciation data
local data = {
	-- word-initial consonants
	["initial"] = {
		["b"] = "p",
		["d"] = "t",
		["g"] = "k",
		["p"] = "pʰ",
		["t"] = "tʰ",
		["k"] = "kʰ",
		["f"] = "f",
		["gj"] = "c",
		["kj"] = "cʰ",
		["hv"] = "kv",
		["þ"] = "θ",
		["hl"] = "l" .. voiceless,
		["hn"] = "n" .. voiceless,
		["hr"] = "r" .. voiceless,
		["hj"] = "ç"
	},
	["trigraphs"] = {
		["fnd"] = "mt",
		["fnt"] = "m" .. voiceless .. "t",
		["mbd"] = "mt",
		["mbg"] = "mk",
		["mbs"] = "ms",
		["mbt"] = "m" .. voiceless .. "t"
	},
	["digraphs"] = {
		["kj"] = "c",
		["ll"] = "tl" .. voiceless,
		["rl"] = "rtl" .. voiceless,
		["rn"] = "rtn" .. voiceless,
		["sl"] = "stl" .. voiceless,
		["sn"] = "stn" .. voiceless,
		["au"] = "œy" .. nonsyllabic,
		["ei"] = "ɛi" .. nonsyllabic,
		["ey"] = "ɛi" .. nonsyllabic
	},
	-- internal and final consonants
	["internal"] = {
		["b"] = "p",
		["d"] = "t",
		["x"] = "xs",
		["f"] = "v",
		["þ"] = "θ"
	},
	-- vowels: regular, before gi, before ng/nk
	["vowels"] = {
		["a"] = {
			"a",
			"ai" .. nonsyllabic,
			"au" .. nonsyllabic
		},
		["á"] = {
			"au" .. nonsyllabic,
			"au" .. nonsyllabic,
			"au" .. nonsyllabic
		},
		["e"] = {
			"ɛ",
			"ei" .. nonsyllabic,
			"ɛi" .. nonsyllabic
		},
		["é"] = {
			"jɛ",
			"jɛ",
			"jɛ"
		},
		["i"] = {
			"ɪ",
			"i",
			"i"
		},
		["í"] = {
			"i",
			"i",
			"i"
		},
		["o"] = {
			"ɔ",
			"ɔi" .. nonsyllabic,
			"ɔi" .. nonsyllabic
		},
		["ó"] = {
			"ou" .. nonsyllabic,
			"ou" .. nonsyllabic,
			"ou" .. nonsyllabic
		},
		["u"] = {
			"ʏ",
			"ʏi" .. nonsyllabic,
			"u"
		},
		["ú"] = {
			"u",
			"u",
			"u"
		},
		["æ"] = {
			"ai" .. nonsyllabic,
			"ai" .. nonsyllabic,
			"ai" .. nonsyllabic
		},
		["ö"] = {
			"œ",
			"œy" .. nonsyllabic,
			"œy" .. nonsyllabic
		}
	}
}

-- add data for preaspirated stop clusters
for letter_a in gmatch("ptk", ".") do
	data.digraphs[letter_a .. letter_a] = "h" .. letter_a
	for letter_b in gmatch("lmn", ".") do
		data.digraphs[letter_a .. letter_b] = "h" .. letter_a .. letter_b .. voiceless
	end
end

-- list pronunciation substitutions
local rules = {
	[1] = {
		["(" .. stress .. consonant .. "*" .. vowel .. ")nn"] = "%1tn" .. voiceless,
		["(" .. vowel .. ")" .. "g" .. "([aʏðlr])"] = "%1ɣ%2",
		["(" .. vowel .. ")" .. "g" .. "([ji])"] = "%1j%2"
	},
	[2] = { -- set 2 only applies to native words
		["(" .. vowel .. ")" .. "[kg]" .. "([ts])"] = "%1x%2",
		["(" .. vowel .. ")" .. "p" .. "([tsk])"] = "%1f%2",
		["ng([ls])"] = "ŋ%1",
		["(u" .. nonsyllabic .. "?" .. long .. "?)[vɣ]"] = "%1",
		["g"] = "k",
		["kʏ(" .. long .. "?)ð"] = "kvʏ%1ð"
	},
	[3] = {
		["k(ʰ?[ɛiɪ])"] = "c%1",
		["k(ʰ?ai)"] = "c%1",
		["kj"] = "c",
		["(" .. long .. "?)jj"] = "i" .. nonsyllabic .. "%1j"
	},
	[4] = {
		["nk"] = "ŋk",
		["kc"] = "c" .. long,
		["(.)%1"] = "%1" .. long
	}
}

-- function to determine vowel length
local function determineLength(v, next_chars)
	-- short if before x as it's treated like two consonants
	if find(next_chars, "x") then
		return v
	-- long if word-final, preceding a single consonant followed by a vowel
	-- or preceding the consonant clusters b/d/g/k/p/s/t + j/r/v
	elseif len(next_chars) <= 1 or
		find(next_chars, consonant .. "[^" ..  consonants .. "%-]") or
		find(next_chars, "[bdgkpst][jrv]") then
		return v .. long
	-- short otherwise
	else
		return v
	end
end

-- function to determine vowel type
local function determineVowel(v, term, pos, accent)
	-- check next two chars
	local next_chars = sub(term, pos + 1, pos + 2)

	-- before ng/nk
	if next_chars == "ng" or next_chars == "nk" then
		return data.vowels[v][3]
	-- before gi
	elseif next_chars == "gi" then
		return data.vowels[v][2]
	-- determine vowel length if stressed (FIXME!)
	elseif accent ~= false then
		return determineLength(data.vowels[v][1], next_chars)
	-- otherwise
	else
		return data.vowels[v][1]
	end
end

-- function to count syllables
local function countSyllables(term)
	local count = 0
	local poss = {}

	-- match positions of all vowels
	for i in gmatch(term, vowel) do
		count = count + 1
		table.insert(poss, i)
	end

	-- return syllable count
	return count, poss
end

-- function to generate rhyme
local function getRhyme(term)
	local count, poss = countSyllables(term)
	local start = 0

	-- mark start of rhyme
	if count == 1 then
		-- start at last syllable
		start = "-" .. term[poss[1]]
	else
		-- start at second-last syllable
		start = "-" .. term[poss[count - 1]]
	end

	-- return rhymes
	return sub(term, start)
end

-- function to generate transcription
function export.toIPA(term, accent, special)
	if type(term) ~= "string" then
		error('The function "toIPA" requires a string argument.')
	end

	-- initialise pronunciation
	term = lower(term)
	term = gsub(term, "y", "i"):gsub("ý", "í") -- y is the same as i
	local IPA = {}
	local pos = 1

	-- mark stress (FIXME!)
	if accent ~= false then
		table.insert(IPA, primary_stress)
	end

	-- handle initial letters
	if find(sub(term, 1, 1), consonant) then
		-- handle digraphs
		if data.initial[sub(term, 1, 2)] then
			table.insert(IPA, data.initial[sub(term, 1, 2)])
			pos = 3
		-- handle single consonants
		elseif data.initial[sub(term, 1, 1)] then
			table.insert(IPA, data.initial[sub(term, 1, 1)])
			pos = 2
		-- otherwise
		else
			table.insert(IPA, sub(term, 1, 1))
			pos = 2
		end
	end

	-- handle rest of string
	while pos <= len(term) do
		-- handle trigraphs
		if data.trigraphs[sub(term, pos, pos + 2)] then
			table.insert(IPA, data.trigraphs[sub(term, pos, pos + 2)])
			pos = pos + 3
		-- handle digraphs
		elseif data.digraphs[sub(term, pos, pos + 1)] then
			table.insert(IPA, data.digraphs[sub(term, pos, pos + 1)])
			pos = pos + 2
		-- handle vowels
		elseif data.vowels[sub(term, pos, pos)] then
			table.insert(IPA, determineVowel(sub(term, pos, pos), term, pos, accent))
			accent = false
			pos = pos + 1
		-- handle internal consonants
		elseif pos ~= 1 and data.internal[sub(term, pos, pos)] then
			table.insert(IPA, data.internal[sub(term, pos, pos)])
			pos = pos + 1
		-- handle compound stress
		elseif sub(term, pos, pos) == "-" then
			accent = true
			table.insert(IPA, secondary_stress)
			pos = pos + 1
		-- otherwise
		else
			table.insert(IPA, sub(term, pos, pos))
			pos = pos + 1
		end
	end

	-- combine ipa symbols into single string
	local pron = table.concat(IPA)

	-- apply phonemic rules
	for _, set_of_rules in ipairs(rules) do
		if special then
			if set_of_rules ~= rules[2] then
				for regex, replacement in pairs(set_of_rules) do
					pron = gsub(pron, regex, replacement)
				end
			end
		else
			for regex, replacement in pairs(set_of_rules) do
				pron = gsub(pron, regex, replacement)
			end
		end
	end

	-- remove any unwanted characters (e.g., hyphens, full stops)
	pron = gsub(pron, "[%-%.]", "")

	return pron
end

-- main export function
function export.show(frame)
	local params = {
		[1] = {},
		[2] = {}
	}

	local title = mw.title.getCurrentTitle()

	local args = require("Module:parameters").process(frame:getParent().args, params)
	local term = args[1] or title.text
	local accent = args[2]
	local special = args[3]

	local ipa = export.toIPA(term, accent, special)
	ipa = "/" .. ipa .. "/"

	return m_ipa.format_IPA_full { lang = lang, items = {{ pron = ipa }} }
end

return export