Module:Deva-Beng-translit

This module will transliterate text in the Devanagari script. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:Deva-Beng-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}

local twoChars = {
	["अॕ"] = "অ্যা", ["एॕ"] = "এ্যা"
}

local oneChar = {
	["क"] = "ক", ["ख"] = "খ", ["ग"] = "গ", ["घ"] = "ঘ", ["ङ"] = "ঙ", ["च"] = "চ", ["छ"] = "ছ", ["ज"] = "জ", ["झ"] = "ঝ", ["ञ"] = "ঞ", ["ट"] = "ট", ["ठ"] = "ঠ", ["ड"] = "ড", ["ढ"] = "ঢ", ["ण"] = "ণ", ["त"] = "ত", ["थ"] = "থ", ["द"] = "দ", ["ध"] = "ধ", ["न"] = "ন", ["प"] = "প", ["फ"] = "ফ", ["ब"] = "ব", ["भ"] = "ভ", ["म"] = "ম", ["य"] = "য়", ["र"] = "র", ["ल"] = "ল", ["ळ"] = "ল়", ["व"] = "ব", ["श"] = "শ", ["ष"] = "ষ", ["स"] = "স", ["ह"] = "হ",
	["अ"] = "অ", ["आ"] = "আ", ["ॲ"] = "অ্য", ["इ"] = "ই", ["ई"] = "ঈ", ["उ"] = "উ", ["ऊ"] = "ঊ", ["ऋ"] = "ঋ", ["ॠ"] = "ৠ", ["ऌ"] = "ঌ", ["ॡ"] = "ৡ", ["ऍ"] = "এ্য", ["ए"] = "এ", ["ऐ"] = "ঐ", ["ओ"] = "ও", ["औ"] = "ঔ",
	["ा"] = "া", ["ि"] = "ি", ["ी"] = "ী", ["ु"] = "ু", ["ू"] = "ূ", ["ृ"] = "ৃ", ["ॄ"] = "ৄ", ["ॢ"] = "ৢ", ["ॣ"] = "ৣ", ["ॅ"] = "্য", ["ॕ"] = "্যা", ["े"] = "ে", ["ै"] = "ৈ", ["ो"] = "ো", ["ौ"] = "ৌ", ["्"] = "্",
	["ं"] = "ং", ["ः"] = "ঃ", ["ँ"] = "ঁ", ["़"] = "়", ["ऽ"] = "ঽ", ["॰"] = "৽", ["꣼"] = "ঀ", ["ॐ"] = "ওঁ",
	["०"] = "০", ["१"] = "১", ["२"] = "২", ["३"] = "৩", ["४"] = "৪", ["५"] = "৫", ["६"] = "৬", ["७"] = "৭", ["८"] = "৮", ["९"] = "৯"
}

-- Override returns text even if some characters cannot be transliterated.
-- If noKhandaTa is set, then "ৎ" will not be contextually substituted for "ত্", which is suitable (e.g.) for Sanskrit transliteration.
function export.tr(text, lang, sc, override, noKhandaTa)
	local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*"
	local Beng = require("Module:scripts").getByCode("Beng")
	text = mw.ustring.toNFD(text)
	
	for digraph, replacement in pairs(twoChars) do
		text = string.gsub(text, digraph, replacement)
	end

	text = string.gsub(text, "^य", "য")
	text = string.gsub(text, "्य", "্য")

	text = string.gsub(text, UTF8_char, oneChar)
	
	-- Khanda Ta is not used in Sanskrit.
	if not noKhandaTa and lang ~= "sa" then
		text = mw.ustring.gsub(text, "ত্([^তথনবমরল])", "ৎ%1")
		text = string.gsub(text, "ত্$", "ৎ")
	end
	
	text = mw.ustring.toNFC(text)
	local reducedText = mw.ustring.gsub(mw.ustring.gsub(text, "<.->", ""), "[%s%p\n]+", "")
	if (mw.ustring.len(reducedText) == Beng:countCharacters(reducedText) and not mw.ustring.find(text, "়়")) or override then
		return text
	else
		return nil
	end
end

return export