Module:is-pronunciation
- The following documentation is located at Module:is-pronunciation/documentation. [edit] Categories were auto-generated by Module:module categorization. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
Note: This module is unfinished and should NOT be used in entries.
This module generates IPA from Icelandic orthography, using the rules given at Icelandic orthography.
Testcases
4 of 50 tests failed. (refresh)
Text | Expected | Actual | |
---|---|---|---|
þorn | ˈθɔrtn̥ | ˈθɔrtn̥ | |
himinn | ˈhɪːmɪnː | ˈhɪːmɪnː | |
brúnn | ˈprutn̥ | ˈprutn̥ | |
steinn | ˈstɛi̯tn̥ | ˈstɛi̯tn̥ | |
geimsteinn (respelled geim-steinn) | ˈcɛi̯mstɛi̯tn̥ | ˈcɛi̯mˌstɛi̯tn̥ | |
karl | ˈkʰartl̥ | ˈkʰartl̥ | |
rusl | ˈrʏstl̥ | ˈrʏstl̥ | |
bysna | ˈpɪstn̥a | ˈpɪstn̥a | |
ráps (respelled ráp.s) | ˈrau̯ːps | ˈrau̯ːps | |
taka | ˈtʰaːka | ˈtʰaːka | |
þökk | ˈθœhk | ˈθœhk | |
vopn | ˈvɔhpn̥ | ˈvɔhpn̥ | |
brotna | ˈprɔhtn̥a | ˈprɔhtn̥a | |
sakna | ˈsahkn̥a | ˈsahkn̥a | |
kembt | ˈcʰɛm̥t | ˈcʰɛm̥t | |
þið | ˈθɪːð | ˈθɪːð | |
guð | ˈkvʏːð | ˈkvʏːð | |
byggja | ˈpɪcːa | ˈpɪcːa | |
syngja | ˈsinca | ˈsinca | |
munkur | ˈmuŋkʏr | ˈmuŋkʏr | |
öngull | ˈœy̯ŋkʏtl̥ | ˈœy̯ŋkʏtl̥ | |
drengur | ˈtrɛi̯ŋkʏr | ˈtrɛi̯ŋkʏr | |
svangur | ˈsvau̯ŋkʏr | ˈsvau̯ŋkʏr | |
England | ˈɛi̯ŋlant | ˈɛi̯ŋlant | |
segja | ˈsɛi̯ːja | ˈsɛi̯ːja | |
fluga | ˈflʏːɣa | ˈflʏːɣa | |
fljúga | ˈfljuːa | ˈfljuːa | |
bógur | ˈpou̯ːʏr | ˈpou̯ːʏr | |
lágur | ˈlau̯ːʏr | ˈlau̯ːʏr | |
prófa | ˈpʰrou̯ːa | ˈpʰrou̯ːa | |
dags | ˈtaxs | ˈtaxs | |
dragt | ˈtraxt | ˈtraxt | |
guðspjall (respelled guð-spjall) | ˈkvʏðspjatl̥ | ˈkvʏðˌspjatl̥ | |
september | ˈsɛftɛmpɛr | ˈsɛftɛmpɛr | |
október | ˈɔxtou̯pɛr | ˈɔxtou̯pɛr | |
gjalda | ˈcalta | ˈcalta | |
geta | ˈcɛːta | ˈcɛːta | |
kjósa | ˈcʰou̯ːsa | ˈcʰou̯ːsa | |
keyra | ˈcʰɛi̯ːra | ˈcʰɛi̯raː | |
kirkja | ˈcʰɪrca | ˈcʰɪrca | |
hlýr | ˈl̥iːr | ˈl̥iːr | |
hratt | ˈr̥aht | ˈr̥aht | |
spara | ˈspaːra | ˈspaːra | |
þykja | ˈθɪːca | ˈθɪːca | |
lofa | ˈlɔːva | ˈlɔːva | |
rós | ˈrou̯ːs | ˈrou̯ːs | |
vaxa | ˈvaxsa | ˈvaxsa | |
mylla, special=true | ˈmɪlːa | ˈmɪtl̥a | |
nudda | ˈnʏtːa | ˈnʏtːa | |
kaþólikki | ˈkʰaːθou̯lɪhcɪ | ˈkʰaːθou̯lɪhcɪ |
local export = {}
local lang = require("Module:languages").getByCode("is")
local sc = require("Module:scripts").getByCode("Latn")
local m_ipa = require("Module:IPA")
function export.tag_text(text, face)
return require("Module:script utilities").tag_text(text, lang, sc, face)
end
function export.link(term, face)
return require("Module:links").full_link( { term = term, lang = lang, sc = sc }, face )
end
local sub = mw.ustring.sub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local gsub = mw.ustring.gsub
local len = mw.ustring.len
local lower = mw.ustring.lower
local U = require("Module:string/char")
local nonsyllabic = U(0x32F) -- inverted breve below
local voiceless = U(0x325) -- combining ring below
local long = U(0x2D0) -- triangular colon
local primary_stress = "ˈ"
local secondary_stress = "ˌ"
local consonants = "bdðfghjklmnprstvxþ"
local consonant = "[" .. consonants .. "]"
local vowels = "aɛɪiʏyœɔou"
local vowel = "[" .. vowels .. "]+" .. nonsyllabic .. "?" .. long .. "?"
local stress = "[" .. primary_stress .. secondary_stress .. "]"
-- pronunciation data
local data = {
-- word-initial consonants
["initial"] = {
["b"] = "p",
["d"] = "t",
["g"] = "k",
["p"] = "pʰ",
["t"] = "tʰ",
["k"] = "kʰ",
["f"] = "f",
["gj"] = "c",
["kj"] = "cʰ",
["hv"] = "kv",
["þ"] = "θ",
["hl"] = "l" .. voiceless,
["hn"] = "n" .. voiceless,
["hr"] = "r" .. voiceless,
["hj"] = "ç"
},
["trigraphs"] = {
["fnd"] = "mt",
["fnt"] = "m" .. voiceless .. "t",
["mbd"] = "mt",
["mbg"] = "mk",
["mbs"] = "ms",
["mbt"] = "m" .. voiceless .. "t"
},
["digraphs"] = {
["kj"] = "c",
["ll"] = "tl" .. voiceless,
["rl"] = "rtl" .. voiceless,
["rn"] = "rtn" .. voiceless,
["sl"] = "stl" .. voiceless,
["sn"] = "stn" .. voiceless,
["au"] = "œy" .. nonsyllabic,
["ei"] = "ɛi" .. nonsyllabic,
["ey"] = "ɛi" .. nonsyllabic
},
-- internal and final consonants
["internal"] = {
["b"] = "p",
["d"] = "t",
["x"] = "xs",
["f"] = "v",
["þ"] = "θ"
},
-- vowels: regular, before gi, before ng/nk
["vowels"] = {
["a"] = {
"a",
"ai" .. nonsyllabic,
"au" .. nonsyllabic
},
["á"] = {
"au" .. nonsyllabic,
"au" .. nonsyllabic,
"au" .. nonsyllabic
},
["e"] = {
"ɛ",
"ei" .. nonsyllabic,
"ɛi" .. nonsyllabic
},
["é"] = {
"jɛ",
"jɛ",
"jɛ"
},
["i"] = {
"ɪ",
"i",
"i"
},
["í"] = {
"i",
"i",
"i"
},
["o"] = {
"ɔ",
"ɔi" .. nonsyllabic,
"ɔi" .. nonsyllabic
},
["ó"] = {
"ou" .. nonsyllabic,
"ou" .. nonsyllabic,
"ou" .. nonsyllabic
},
["u"] = {
"ʏ",
"ʏi" .. nonsyllabic,
"u"
},
["ú"] = {
"u",
"u",
"u"
},
["æ"] = {
"ai" .. nonsyllabic,
"ai" .. nonsyllabic,
"ai" .. nonsyllabic
},
["ö"] = {
"œ",
"œy" .. nonsyllabic,
"œy" .. nonsyllabic
}
}
}
-- add data for preaspirated stop clusters
for letter_a in gmatch("ptk", ".") do
data.digraphs[letter_a .. letter_a] = "h" .. letter_a
for letter_b in gmatch("lmn", ".") do
data.digraphs[letter_a .. letter_b] = "h" .. letter_a .. letter_b .. voiceless
end
end
-- list pronunciation substitutions
local rules = {
[1] = {
["(" .. stress .. consonant .. "*" .. vowel .. ")nn"] = "%1tn" .. voiceless,
["(" .. vowel .. ")" .. "g" .. "([aʏðlr])"] = "%1ɣ%2",
["(" .. vowel .. ")" .. "g" .. "([ji])"] = "%1j%2"
},
[2] = { -- set 2 only applies to native words
["(" .. vowel .. ")" .. "[kg]" .. "([ts])"] = "%1x%2",
["(" .. vowel .. ")" .. "p" .. "([tsk])"] = "%1f%2",
["ng([ls])"] = "ŋ%1",
["(u" .. nonsyllabic .. "?" .. long .. "?)[vɣ]"] = "%1",
["g"] = "k",
["kʏ(" .. long .. "?)ð"] = "kvʏ%1ð"
},
[3] = {
["k(ʰ?[ɛiɪ])"] = "c%1",
["k(ʰ?ai)"] = "c%1",
["kj"] = "c",
["(" .. long .. "?)jj"] = "i" .. nonsyllabic .. "%1j"
},
[4] = {
["nk"] = "ŋk",
["kc"] = "c" .. long,
["(.)%1"] = "%1" .. long
}
}
-- function to determine vowel length
local function determineLength(v, next_chars)
-- short if before x as it's treated like two consonants
if find(next_chars, "x") then
return v
-- long if word-final, preceding a single consonant followed by a vowel
-- or preceding the consonant clusters b/d/g/k/p/s/t + j/r/v
elseif len(next_chars) <= 1 or
find(next_chars, consonant .. "[^" .. consonants .. "%-]") or
find(next_chars, "[bdgkpst][jrv]") then
return v .. long
-- short otherwise
else
return v
end
end
-- function to determine vowel type
local function determineVowel(v, term, pos, accent)
-- check next two chars
local next_chars = sub(term, pos + 1, pos + 2)
-- before ng/nk
if next_chars == "ng" or next_chars == "nk" then
return data.vowels[v][3]
-- before gi
elseif next_chars == "gi" then
return data.vowels[v][2]
-- determine vowel length if stressed (FIXME!)
elseif accent ~= false then
return determineLength(data.vowels[v][1], next_chars)
-- otherwise
else
return data.vowels[v][1]
end
end
-- function to count syllables
local function countSyllables(term)
local count = 0
local poss = {}
-- match positions of all vowels
for i in gmatch(term, vowel) do
count = count + 1
table.insert(poss, i)
end
-- return syllable count
return count, poss
end
-- function to generate rhyme
local function getRhyme(term)
local count, poss = countSyllables(term)
local start = 0
-- mark start of rhyme
if count == 1 then
-- start at last syllable
start = "-" .. term[poss[1]]
else
-- start at second-last syllable
start = "-" .. term[poss[count - 1]]
end
-- return rhymes
return sub(term, start)
end
-- function to generate transcription
function export.toIPA(term, accent, special)
if type(term) ~= "string" then
error('The function "toIPA" requires a string argument.')
end
-- initialise pronunciation
term = lower(term)
term = gsub(term, "y", "i"):gsub("ý", "í") -- y is the same as i
local IPA = {}
local pos = 1
-- mark stress (FIXME!)
if accent ~= false then
table.insert(IPA, primary_stress)
end
-- handle initial letters
if find(sub(term, 1, 1), consonant) then
-- handle digraphs
if data.initial[sub(term, 1, 2)] then
table.insert(IPA, data.initial[sub(term, 1, 2)])
pos = 3
-- handle single consonants
elseif data.initial[sub(term, 1, 1)] then
table.insert(IPA, data.initial[sub(term, 1, 1)])
pos = 2
-- otherwise
else
table.insert(IPA, sub(term, 1, 1))
pos = 2
end
end
-- handle rest of string
while pos <= len(term) do
-- handle trigraphs
if data.trigraphs[sub(term, pos, pos + 2)] then
table.insert(IPA, data.trigraphs[sub(term, pos, pos + 2)])
pos = pos + 3
-- handle digraphs
elseif data.digraphs[sub(term, pos, pos + 1)] then
table.insert(IPA, data.digraphs[sub(term, pos, pos + 1)])
pos = pos + 2
-- handle vowels
elseif data.vowels[sub(term, pos, pos)] then
table.insert(IPA, determineVowel(sub(term, pos, pos), term, pos, accent))
accent = false
pos = pos + 1
-- handle internal consonants
elseif pos ~= 1 and data.internal[sub(term, pos, pos)] then
table.insert(IPA, data.internal[sub(term, pos, pos)])
pos = pos + 1
-- handle compound stress
elseif sub(term, pos, pos) == "-" then
accent = true
table.insert(IPA, secondary_stress)
pos = pos + 1
-- otherwise
else
table.insert(IPA, sub(term, pos, pos))
pos = pos + 1
end
end
-- combine ipa symbols into single string
local pron = table.concat(IPA)
-- apply phonemic rules
for _, set_of_rules in ipairs(rules) do
if special then
if set_of_rules ~= rules[2] then
for regex, replacement in pairs(set_of_rules) do
pron = gsub(pron, regex, replacement)
end
end
else
for regex, replacement in pairs(set_of_rules) do
pron = gsub(pron, regex, replacement)
end
end
end
-- remove any unwanted characters (e.g., hyphens, full stops)
pron = gsub(pron, "[%-%.]", "")
return pron
end
-- main export function
function export.show(frame)
local params = {
[1] = {},
[2] = {}
}
local title = mw.title.getCurrentTitle()
local args = require("Module:parameters").process(frame:getParent().args, params)
local term = args[1] or title.text
local accent = args[2]
local special = args[3]
local ipa = export.toIPA(term, accent, special)
ipa = "/" .. ipa .. "/"
return m_ipa.format_IPA_full { lang = lang, items = {{ pron = ipa }} }
end
return export