Module:User:AryamanA/hi-IPA
- This module lacks a documentation subpage. You may create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • user page • user talk page • userspace
This is a private module sandbox of AryamanA, for his own experimentation. Items in this module may be added and removed at AryamanA's discretion; do not rely on this module's stability.
local export = {}
local lang = require("Module:languages").getByCode("hi")
local sc = require("Module:scripts").getByCode("Deva")
local m_IPA = require("Module:IPA")
local gsub = mw.ustring.gsub
local sub = mw.ustring.sub
local gmatch = mw.ustring.gmatch
local find = mw.ustring.find
local correspondences = {
["ṅ"] = "ŋ", ["g"] = "ɡ",
["c"] = "t͡ʃ", ["j"] = "d͡ʒ", ["ñ"] = "ɲ",
["ṭ"] = "ʈ", ["ḍ"] = "ɖ", ["ṇ"] = "ɳ",
["t"] = "t̪", ["d"] = "d̪",
["y"] = "j", ["r"] = "ɾ", ["v"] = "ʋ", ["l"] = "l̪",
["ś"] = "ʃ", ["ṣ"] = "ʂ", ["h"] = "ɦ",
["ṛ"] = "ɽ", ["ž"] = "ʒ", ["ḻ"] = "ɭ", ["ġ"] = "ɣ", ["q"] = "q", ["x"] = "x", ["ṉ"] = "n", ["ṟ"] = "ɹ",
["a"] = "ə", ["ā"] = "ɑː", ["i"] = "ɪ",
["ī"] = "iː", ["o"] = "oː", ["e"] = "eː",
["u"] = "ʊ", ["ū"] = "uː", ["ŏ"] = "ɔ", ["ĕ"] = "æ",
["ẽ"] = "ẽː", ["ũ"] = "ʊ̃", ["õ"] = "õː", ["ã"] = "ə̃", ["ā̃"] = "ɑ̃ː", ["ĩ"] = "ɪ̃", ["ī̃"] = "ĩː",
["ॐ"] = "oːm", ["ḥ"] = "ʰ", ["'"] = "(ʔ)",
}
local perso_arabic = {
["x"] = "kh", ["ġ"] = "g", ["q"] = "k", ["ž"] = "z", ["z"] = "j", ["f"] = "ph", ["'"] = "",
}
local lengthen = {
["a"] = "ā", ["i"] = "ī", ["u"] = "ū",
}
local vowels = "əäaāiīuūoŏĕʊɪɔɔ̃ɛeæãā̃ẽĩī̃õũū̃ː"
local vowel = "[əäaāiīuūoŏĕʊɪɔɔ̃ɛeæãā̃ẽĩī̃õũū̃]ː?"
local weak_h = "([gjdḍbṛnməäaāiīuūoŏĕʊɪɔɔ̃ɛeæãā̃ẽĩī̃õũū̃ː])h"
local aspirate = "([kctṭp])"
local syllabify_pattern = "([" .. vowels .. "]̃?)([^" .. vowels .. "%.]+)([" .. vowels .. "]̃?)"
local function find_consonants(text)
local current = ""
local cons = {}
for cc in mw.ustring.gcodepoint(text .. " ") do
local ch = mw.ustring.char(cc)
if find(current .. ch, "^[kgṅcjñṭḍṇtdnpbmyrlvśṣshqxġzžḻṛṟfθṉ]$") or find(current .. ch, "^[kgcjṭḍṇtdpbṛ]h$") then
current = current .. ch
else
table.insert(cons, current)
current = ch
end
end
return cons
end
local function syllabify(text)
for count = 1, 2 do
text = gsub(text, syllabify_pattern, function(a, b, c)
b_set = find_consonants(b)
table.insert(b_set, #b_set > 1 and 2 or 1, ".")
return a .. table.concat(b_set) .. c
end)
text = gsub(text, "(" .. vowel .. ")(?=" .. vowel .. ")", "%1.")
end
for count = 1, 2 do
text = gsub(text, "(" .. vowel .. ")(" .. vowel .. ")", "%1.%2")
end
return text
end
function export.stress(ipa)
local result = {}
for word in gmatch(ipa, '([^‿]+)') do
local syllables = {}
local light, light2 = nil, nil
local heavy, heavy2 = nil, nil
local superheavy, superheavy2 = nil, nil
local i = 1
for syllable in gmatch(word, '([^%.]+)') do
local t = 'L'
if gmatch(sub(syllable, 1, 1), '[^' .. vowels .. ']') and gmatch(sub(syllable, -1, -1), '[^' .. vowels .. ']') then
superheavy2, superheavy = superheavy, i
t = 'SH'
elseif gmatch(syllable, 'ː') then
heavy2, heavy = heavy, i
t = 'H'
else
light2, light = light, i
end
table.insert(syllables, syllable .. t)
i = i + 1
end
if superheavy2 then table.insert(syllables, superheavy2, 'ˈ')
elseif superheavy then table.insert(syllables, superheavy, 'ˈ')
elseif heavy2 then table.insert(syllables, heavy2, 'ˈ')
elseif heavy then table.insert(syllables, heavy, 'ˈ')
elseif light2 then table.insert(syllables, light2, 'ˈ')
elseif light then table.insert(syllables, light, 'ˈ') end
table.insert(result, table.concat(syllables, '.'))
end
local final = table.concat(result, '‿')
final = gsub(final, '%.ˈ', 'ˈ')
final = gsub(final, 'ˈ%.', 'ˈ')
return final
end
local identical = "knlsfzθ"
for character in gmatch(identical, ".") do
correspondences[character] = character
end
local function transliterate(text)
return lang:transliterate(text)
end
function export.link(term)
return require("Module:links").full_link{ term = term, lang = lang, sc = sc }
end
function export.narrow_IPA(ipa)
-- what /ɑ/ really is
ipa = gsub(ipa, 'ɑ', 'ä')
-- dentals
ipa = gsub(ipa, '([snl])', '%1̪')
-- nasals induce nasalization
ipa = gsub(ipa, '([əäɪiʊueɛoɔæ])(ː?)([nɳŋm])', '%1̃%2%3')
-- cc, jj
ipa = gsub(ipa, 't͡ʃ(%.?)t͡ʃ', 't̚%1t͡ʃ')
ipa = gsub(ipa, 'd͡ʒ(%.?)d͡ʒ', 'd̚%1d͡ʒ')
-- syllable boundary consonants
ipa = gsub(ipa, '([kgʈɖtdpb]̪?)%.([^jʋ])', '%1̚.%2')
ipa = gsub(ipa, '([kgʈɖtdpb]̪?)%.([^jʋ])', '%1̚.%2')
-- aspiration rules
ipa = gsub(ipa, 'əʱ%.([kgŋtdɲʈɖɳnpbmɾlzqfʂʃsʒɭɣɹʋ])', 'ɛʱ.%1')
ipa = gsub(ipa, 'ʊʱ%.([kgŋtdɲʈɖɳnpbmɾlzqfʂʃsʒɭɣɹʋ])', 'ɔʱ.%1')
ipa = gsub(ipa, 'ə%.ɦə', 'ɛ.ɦɛ')
ipa = gsub(ipa, 'ʊ%.ɦə', 'ɔ.ɦɔ')
ipa = gsub(ipa, 'ə%.ɦʊ', 'ɔ.ɦɔ')
-- retroflex s rules
ipa = gsub(ipa, 'ʂ(%.?[^ʈɖ])', 'ʃ%1')
ipa = gsub(ipa, 'ʂ$', 'ʃ')
return ipa
end
function export.toIPA(text, style)
text = gsub(text, '॰', '.')
text = gsub(text, '%-', '.')
local translit = transliterate(text)
if not translit then
error('The term "' .. Hindi .. '" could not be transliterated.')
end
if style == "standard" then
translit = gsub(translit, "[xġqžzf']", perso_arabic)
end
-- force final schwa
translit = gsub(translit, "a~$", "ə")
-- vowels
translit = gsub(translit, "͠", "̃")
translit = gsub(translit, "a([ui])([yw])", function(a, b)
return "a" .. lengthen[a] .. b
end)
translit = gsub(translit, 'a(̃?)i', 'ɛ%1ː')
translit = gsub(translit, 'a(̃?)u', 'ɔ%1ː')
translit = gsub(translit, "%-$", "")
translit = gsub(translit, "^%-", "")
translit = gsub(translit, "ŕ$", "r")
translit = gsub(translit, "ŕ", "ri")
translit = gsub(translit, ",", "")
translit = gsub(translit, " ", "..")
translit = syllabify(translit)
translit = gsub(translit, "%.ː", "ː.")
translit = gsub(translit, "%.̃", "̃")
-- gy
translit = gsub(translit, 'jñ', 'gy')
translit = gsub(translit, aspirate .. "h", '%1ʰ')
translit = gsub(translit, weak_h, '%1ʱ')
local result = gsub(translit, ".", correspondences)
-- remove final schwa (Pandey, 2014)
-- actually weaken
result = gsub(result, "(...)ə$", "%1ᵊ")
result = gsub(result, "(...)ə ", "%1ᵊ ")
result = gsub(result, "(...)ə%.?%-", "%1ᵊ-")
result = gsub(result, "%.?%-", ".")
result = gsub(result, "%.%.", "‿")
-- formatting
result = gsub(result, "ː̃", "̃ː")
result = gsub(result, "ː%.̃", "̃ː.")
result = gsub(result, "%.$", "")
-- i and u lengthening
result = gsub(result, "ʊ(̃?)(ʱ?)$", "u%1ː%2")
result = gsub(result, "ɪ(̃?)(ʱ?)$", "i%1ː%2")
result = gsub(result, "ɪ%.j", "iː.j")
return result
end
function export.make(frame)
local args = frame:getParent().args
local pagetitle = mw.title.getCurrentTitle().text
local p, results = {}, {}, {}
if args[1] then
for index, item in ipairs(args) do
table.insert(p, (item ~= "") and item or nil)
end
else
p = { pagetitle }
end
for _, Hindi in ipairs(p) do
local standard = export.toIPA(Hindi, "standard")
local persianized = export.toIPA(Hindi, "persianized")
table.insert(results, { pron = "/" .. export.stress(standard) .. "/" })
local narrow = export.narrow_IPA(standard)
if narrow ~= standard then table.insert(results, { pron = "[" .. export.stress(narrow) .. "]" }) end
if standard ~= persianized then
table.insert(results, { pron = "/" .. export.stress(persianized) .. "/" })
local narrow = export.narrow_IPA(persianized)
if narrow ~= persianized then table.insert(results, { pron = "[" .. export.stress(narrow) .. "]" }) end
end
end
return m_IPA.format_IPA_full { lang = lang, items = results }
end
function export.make_ur(frame)
local args = frame:getParent().args
local pagetitle = mw.title.getCurrentTitle().text
local lang = require("Module:languages").getByCode("ur")
local sc = require("Module:scripts").getByCode("ur-Arab")
local p, results = {}, {}, {}
if args[1] then
for index, item in ipairs(args) do
table.insert(p, (item ~= "") and item or nil)
end
else
error("No transliterations given.")
end
for _, Urdu in ipairs(p) do
table.insert(results, { pron = "/" .. export.toIPA(Urdu, "persianized") .. "/" })
end
return m_IPA.format_IPA_full { lang = lang, items = results }
end
return export