Bengali IPA pronunciation module. See {{bn-IPA}}.

Testcases edit

Module:bn-IPA/sandbox/testcases:

22 of 35 tests failed. (refresh)

TextExpectedActualComments
test_all:
Failedঅল্পসংখ্যক (olpośoṅkhok)ɔlpoʃɔŋkʰɔkɔlpɔʃɔŋkʰɔk
Failedউত্তর (uttor)ut̪ːoɹut̪ːɔɾ
Failedঅ্যান্টার্কটিকা (ênṭarkṭika)ɛntaɹktikaænʈaɾkʈika
Passedদেশ (deś)d̪eʃd̪eʃ
Failedআমার (amar)amaɹamaɾ
Failedখেলনা (khelna)kʰɛlnakʰelna
Passedবিবাহ (bibaho)bibaɦobibaɦo
Passedগম (gom)ɡɔmɡɔm
Failedখরগোশ (khorgōś)kʰɔɹɡoʃkʰɔɾɡoʃ
Passedসংস্থান (śoṅsthan)ʃɔŋst̪ʰanʃɔŋst̪ʰan
Passedমধু (modhu)mod̪ʱumod̪ʱu
Passedমিঞা (mĩa)mĩamĩa
Failedশাস্ত্র (śastro)ʃast̪ɹoʃast̪ɾo
Failedসমাচার (śomacar)ʃɔmatɕaɾʃɔmatʃaɾ
Failedশ্রাবণ (srabon)sɹabonsɾabɔn
Passedভাই (bhai)bʱai̯bʱai̯
Failedদৃষ্টি (driśṭi)d̪ɹiʃtid̪ɾiʃʈi
Passedশক্তি (śokti)ʃokt̪iʃokt̪i
Passedওস্তাদ (ōstad)ost̪ad̪ost̪ad̪
Failedপঙ্কজ (poṅkoj)pɔŋkodʑpɔŋkɔdʒ
Passedমালা (mala)malamala
Passedদেওয়াল (deōẇal)d̪eo̯ald̪eo̯al
Failedনিরস্ত্র (nirostro)niɹɔst̪ɹoniɾɔst̪ɾo
Failedনিবৃত্ত (nibritto)niɹbɹit̪ːonibɾit̪ːo
Failedমৃত্যুঞ্জয় (mrittunjoẏ)mɹit̪ːundʑɔe̯mɾit̪ːundʒɔe̯
Failedগর্ভপাত (gorbhopat)ɡɔɹbʱopatɡɔɾbʱɔpat̪
Failedগর্ভ (gorbho)ɡɔɹbʱoɡɔɾbʱo
Failedবস্ত্র (bostro)bɔst̪ɹobɔst̪ɾo
Failedযক্ষ্মা (jokkha)dʑɔkːʰadʒokʰːa
Passedসংখ্যা (śoṅkha)ʃɔŋkʰaʃɔŋkʰa
Failedসম্ভ্রান্ত (śombhranto)ʃɔmbʱɹant̪oʃɔmbʱɾant̪o
Failedইনফ্লুয়েঞ্জা (inphluẏenja)influendʑainpʰlu̯endʒa
Failedপশ্চিমবঙ্গ (pościmboṅgo)poʃtɕimbɔŋɡopɔʃtʃimbɔŋɡo
Passedনয়ন (noẏon)nɔe̯onnɔe̯on
Failedজিহ্বা (jiubha)dʑiubʱadʒiu̯bʱa

local export = {}

local lang = require("Module:languages").getByCode("bn")
local sc = require("Module:scripts").getByCode("Beng")
local m_IPA = require("Module:IPA")
local m_a = require("Module:accent qualifier")

local gsub = mw.ustring.gsub
local gmatch = mw.ustring.gmatch
local find = mw.ustring.find

local correspondences = {
	["ṅ"] = "ŋ", ["g"] = "ɡ", 
	["c"] = "tʃ", ["j"] = "dʒ",
	["ṭ"] = "ʈ", ["ḍ"] = "ɖ",
	["t"] = "t̪", ["d"] = "d̪",
	["ẏ"] = "e̯", ["r"] = "ɾ", ["l"] = "l",
	["ś"] = "ʃ", ["h"] = "ɦ",
	["ṛ"] = "ɽ",

	["ফ"] = "pʰ",	["ভ"] = "bʱ",

	["ẏ"] = "e̯", ["ẇ"] = "o̯",

	["o"] = "ɔ", ["ô"] = "ɔ",
	["ī"] = "i", ["ō"] = "o",
	["ū"] = "u", ["ê"] = "æ",

	["õ"] = "ɔ̃", ["ō̃"] = "õ", ["ī̃"] = "ĩ", ["ū̃"] = "ũ", ["ễ"] = "æ̃",
}

local vowels = "aāiīuūoêɔɔ̃ɛeææ̃ãā̃ễẽĩī̃õũū̃"
local vowel = "[aāiīuūoêɔɔ̃ɛeææ̃ãā̃ễẽĩī̃õũū̃]"
local consonants = "[bcdḍgjklmnṇprṛsṣśtṭzd̪ɖɾt̪ʈ]"
local weak = "([gjdḍbṛ])"
local aspirate = "([kctṭp])"

local function find_consonants(text)
	local current = ""
	local cons = {}
	for cc in mw.ustring.gcodepoint(text .. " ") do
		local ch = mw.ustring.char(cc)
		if find(current .. ch, "^[kgṅcjñṭḍṇṁtdnpbmyrlvśṣshṛz]$") or find(current .. ch, "^[kgcjṭḍṇtdpbṛ]h$") then
			current = current .. ch
		else
			table.insert(cons, current)
			current = ch
		end
	end
	return cons
end

local identical = "knlsfz"
for character in gmatch(identical, ".") do
	correspondences[character] = character
end

local function transliterate(text)
	return (lang:transliterate(text))
end

function export.link(term)
	return require("Module:links").full_link{ term = term, lang = lang, sc = sc }
end

function export.toIPA(text, style)
	text = gsub(text, '॰', '-')
	local translit = text
	if lang:findBestScript(text):isTransliterated() then
		translit = transliterate(text)
	end
	if not translit then
		error('The term "' .. text .. '" could not be transliterated.')
	end

	if style == "desanskritized" then
		translit = gsub(translit, "ṛh", "ṛ")
	end
	
	if style == "colloquial" then
		translit = gsub(translit, "lh", "ll") -- Chatterji
		translit = gsub(translit, "mh", "mm") -- Chatterji
		translit = gsub(translit, "nh", "nn") -- Chatterji
		translit = gsub(translit, "ṛ", "r")
		translit = gsub(translit, "ṛh", "r")
		translit = gsub(translit, "s", "ś")
		translit = gsub(translit, "ś([t̪d̪])", "s%1")
		translit = gsub(translit, "z", "j")
	end

	if style == "vanga" then
		translit = gsub(translit, "bʱ", "v")
		translit = gsub(translit, "c", "ts")
		translit = gsub(translit, "ch", "s")
		translit = gsub(translit, "j(h?)", "z")
		translit = gsub(translit, "pʰ", "f")
		translit = gsub(translit, "ṛ", "r")
		translit = gsub(translit, "ṛh", "r")
		translit = gsub(translit, "s", "ś")
		translit = gsub(translit, "ś([t̪d̪])", "s%1")
	end

	translit = gsub(translit, "ś([lr])", "s%1")
	translit = gsub(translit, "^śp", "sp")
	translit = gsub(translit, "śk$", "sk$")
	translit = gsub(translit, "śk(" .. consonants .. ")", "sk%1")

	-- vowels
	translit = gsub(translit, "%-$", "")
	translit = gsub(translit, "^%-", "")
	translit = gsub(translit, ",", "")
	translit = gsub(translit, " ", "..")
	translit = gsub(translit, "%.ː", "ː.")
	translit = gsub(translit, "%.̃", "̃")

	translit = gsub(translit, "aẏ([eioōu])", "a%1")
	translit = gsub(translit, "eẏ([aioōu])", "e%1")
	translit = gsub(translit, "êẏ([aeioōu])", "ê%1")
	translit = gsub(translit, "iẏ([aeoōu])", "i%1")
	translit = gsub(translit, "ĩẏ([aeoōu])", "ĩ%1")
	translit = gsub(translit, "ito$", "itō")
	translit = gsub(translit, "oẏ([aeiōu])", "o%1")
	translit = gsub(translit, "õẏ([aeiōu])", "õ%1")
	translit = gsub(translit, "ōẇ([aeoōu])", "ō%1")
	translit = gsub(translit, "ō̃ẇ([aeoōu])", "ō̃%1")
	translit = gsub(translit, "uẏ([aeioō])", "u%1")
	translit = gsub(translit, "ũẏ([aeioō])", "ũ%1")

	local consonants_no_h = "[b-df-gj-np-tv-zśṣʃʒ]"

	translit = gsub(translit, "o(" .. consonants .. "h?)([iu])", "ō%1%2") -- vowel harmony, per Chatterji

	translit = gsub(translit, "ho$", "hō")
	translit = gsub(translit, "(" .. vowel .. ")h$", "%1")

	translit = gsub(translit, "o$", "ō")
	translit = gsub(translit, "o ", "ō ")

	translit = gsub(translit, "ok(" .. consonants .. ")", "ōk%1")

	translit = gsub(translit, "([lmn])ho", "%1hō")

	translit = gsub(translit, aspirate .. "h", '%1ʰ')
	translit = gsub(translit, weak .. "h", '%1ʱ')

	local result = gsub(translit, ".", correspondences)

	result = gsub(result, "%.?%-", ".")

	result = gsub(result, "%.%.", "‿")
	
	-- formatting
	result = gsub(result, "ː̃", "̃ː")
	result = gsub(result, "ː%.̃", "̃ː.")
	result = gsub(result, "%.$", "")

	result = gsub(result, "^ɾ", "r")

	-- force final ɔe̯
	result = gsub(result, "([ʒm])oe̯$", "%1ɔe̯")

       -- gemination
    result = gsub(result, "kk(ʰ?)", "k%1ː")
    result = gsub(result, "ɡɡ(ʱ?)", "ɡ%1ː")
    result = gsub(result, "tʃtʃ(ʰ?)", "tʃ%1ː")
    result = gsub(result, "dʒdʒ(ʱ?)", "dʒ%1ː")
    result = gsub(result, "ʈʈ(ʰ?)", "ʈ%1ː")
    result = gsub(result, "ɖɖ(ʱ?)", "ɖ%1ː")
    result = gsub(result, "t̪t̪(ʰ?)", "t̪%1ː")
    result = gsub(result, "d̪d̪(ʱ?)", "d̪%1ː")
    result = gsub(result, "pp", "pː")
    result = gsub(result, "ff", "fː")
    result = gsub(result, "bb(ʱ?)", "b%1ː")
    result = gsub(result, "ɾɾ", "ɾ") -- মহররম
    result = gsub(result, "ll", "lː")
    result = gsub(result, "mm", "mː")
    result = gsub(result, "nn", "nː")
    result = gsub(result, "ʃʃ", "ʃː")
    result = gsub(result, "ss", "sː")

    result = gsub(result, "ŋk(ʰ?)ː", "ŋk%1")
    result = gsub(result, "ŋɡ(ʱ?)ː", "ŋɡ%1")
    result = gsub(result, "ntʃ(ʰ?)ː", "ntʃ%1")
    result = gsub(result, "ndʒ(ʱ?)ː", "ndʒ%1")
    result = gsub(result, "nʈ(ʰ?)ː", "nʈ%1")
    result = gsub(result, "nɖ(ʱ?)ː", "nɖ%1")
    result = gsub(result, "nt̪(ʰ?)ː", "nt̪%1")
    result = gsub(result, "nd̪(ʱ?)ː", "nd̪%1")

    result = gsub(result, "ae", "ae̯")
    result = gsub(result, "iu", "iu̯")
    result = gsub(result, "i(" .. vowel .. ")", "i̯%1")
    result = gsub(result, "i̯u̯", "iu̯")
    result = gsub(result, "oa", "o̯a")
    result = gsub(result, "ɔe̯ɔ", "ɔe̯o")
    result = gsub(result, "ɔo", "ɔo̯")
    result = gsub(result, "u(" .. vowel .. ")", "u̯%1")

    result = gsub(result, "([aeou])i", "%1i̯")

    result = gsub(result, "^ui̯", "u̯i")

    result = gsub(result, "([eiou])̯̯", "%1̯")

	return result
end

function export.narrow_IPA(ipa)
	-- lenition before dental
	ipa = gsub(ipa, "dʒ([d̪t̪])", "z%1")
	ipa = gsub(ipa, "dʒʱ([d̪t̪])", "z%1")

	-- word-final deaspiration
	ipa = gsub(ipa, "ɖʱ$", "ɖ")
	ipa = gsub(ipa, "dʒʱ$", "dʒ")
	ipa = gsub(ipa, "d̪ʱ$", "d̪")
	ipa = gsub(ipa, "ɡʱ$", "ɡ")
	ipa = gsub(ipa, "pʰ$", "p")
	ipa = gsub(ipa, "ʈʰ$", "ʈ")
	ipa = gsub(ipa, "tʃʰ$", "tʃ")
	ipa = gsub(ipa, "t̪ʰ$", "t̪")

	-- exceptions
	ipa = gsub(ipa, "bʱ$", "v")
	ipa = gsub(ipa, "kʰ$", "x")

	-- dental and post-alveolar lateral
	ipa = gsub(ipa, "l([t̪d̪])", "l̪%1")
	ipa = gsub(ipa, "l([ʈɖ])", "ɭ%1")

	-- dental and post-alveolar nasal
	ipa = gsub(ipa, "n([t̪d̪])", "n̪%1")
	ipa = gsub(ipa, "n([ʈɖ])", "ɳ%1")

	-- regressive assimilation
	ipa = gsub(ipa, "b([ʱ?])‿p", "pː")
	ipa = gsub(ipa, "d([ʱ?])‿t", "tː")
	ipa = gsub(ipa, "dʒ([ʱ?])‿tʃ", "tʃː")
	ipa = gsub(ipa, "d̪([ʱ?])‿t̪", "t̪ː")
	ipa = gsub(ipa, "f‿b", "bː")
	ipa = gsub(ipa, "ɡ([ʱ?])‿k", "kː")
	ipa = gsub(ipa, "k([ʰ?])‿ɡ", "ɡː")
	ipa = gsub(ipa, "p([ʰ?])‿b", "bː")
	ipa = gsub(ipa, "t([ʰ?])‿d", "dː")
	ipa = gsub(ipa, "tʃ([ʰ?])‿dʒ", "dʒː")
	ipa = gsub(ipa, "t̪([ʰ?])‿d̪", "d̪ː")
	ipa = gsub(ipa, "z‿s", "sː")
	ipa = gsub(ipa, "zs", "sː")
	ipa = gsub(ipa, "sː([td])", "s%1")

	ipa = gsub(ipa, "^(" .. consonants .. ")ɾ(" .. vowel .. ")(" .. consonants .. ")ɾ", "%1ɾ%2%3ː") -- R syncope
	ipa = gsub(ipa, "([dd̪mtt̪])([lɾ])", "%1ː%2")

	-- intervocalic e̯
	ipa = gsub(ipa, "(" .. vowel .. ")‿(" .. vowel .. ")", "%1e̯%2")

	-- long vowels
	ipa = gsub(ipa, "^(" .. vowel .. ")(" .. consonants .. ")$", "%1ː%2")
	ipa = gsub(ipa, "^(" .. consonants .. ")(" .. vowel .. ")(" .. consonants .. ")$", "%1%2ː%3")

	return ipa
end

function export.make(frame)
	local args = frame:getParent().args
	local pagetitle = mw.title.getCurrentTitle().text
	
	local p, results = {}, {}, {}
	
	if args[1] then
		for index, item in ipairs(args) do
			table.insert(p, (item ~= "") and item or nil)
		end
	else
		p = { pagetitle }
	end
	
	for _, Bengali in ipairs(p) do
		local formal = export.toIPA(Bengali, "formal")
		local desanskritized = export.toIPA(Bengali, "desanskritized")
		local colloquial = export.toIPA(Bengali, "colloquial")
		table.insert(results, { pron = "/" .. formal .. "/" })
		local narrow = export.narrow_IPA(formal)
		if narrow ~= formal then table.insert(results, { pron = "[" .. narrow .. "]" }) end
		if formal ~= desanskritized then
			table.insert(results, { pron = "/" .. desanskritized .. "/" })
			local narrow = export.narrow_IPA(desanskritized)
			if narrow ~= desanskritized then table.insert(results, { pron = "[" .. narrow .. "]" }) end
		end
		if formal ~= colloquial then
			table.insert(results, { pron = "/" .. colloquial .. "/" })
			local narrow = export.narrow_IPA(colloquial)
			if narrow ~= colloquial then table.insert(results, { pron = "[" .. narrow .. "]" }) end
		end
	end
	
	return m_a.show({'Rarh'}) .. ' ' .. m_IPA.format_IPA_full(lang, results)
end

function export.make_vanga(frame)
	local args = frame:getParent().args
	local pagetitle = mw.title.getCurrentTitle().text
	
	local p, results = {}, {}, {}
	
	if args[1] then
		for index, item in ipairs(args) do
			table.insert(p, (item ~= "") and item or nil)
		end
	else
		p = { pagetitle }
	end
	
	for _, Vanga in ipairs(p) do
		local vanga = export.toIPA(Vanga, "vanga")
		table.insert(results, { pron = "/" .. vanga .. "/" })
		local narrow = export.narrow_IPA(vanga)
		if narrow ~= formal then table.insert(results, { pron = "[" .. narrow .. "]" }) end
	end
	
	return m_a.show({'Vanga'}) .. ' ' ..  m_IPA.format_IPA_full(lang, results)
end

return export