Module:brx-translit

This module will transliterate Bodo (India) language text. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:brx-translit/testcases.

Functions edit

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match

local conv = {
	-- consonants
	['ख'] = 'k', ['ग'] = 'g', ['ङ'] = 'ŋ', 
	['ज'] = 'z', ['थ'] = 't', ['द'] = 'd', ['न'] = 'n', ['फ'] = 'f', ['ब'] = 'b', ['म'] = 'm', 
	['य'] = 'y', ['र'] = 'r', ['ल'] = 'l', ['व'] = 'o', ['स'] = 's', ['ह'] = 'h',

	-- vowel diacritics
	['ि'] = 'i', ['ु'] = 'u', ['े'] = 'e', ['ो'] = 'w',
    ['ा'] = 'a', ['ै'] = 'wi', ['ौ'] = 'wu', ['’'] = 'o',

	-- vowel signs
	['अ'] = 'o', ['इ'] = 'i', ['उ'] = 'u', ['ए'] = 'e', ['ओ'] = 'w',
	['आ'] = 'a', ['ऐ'] = 'wi', ['औ'] = 'wu', ['अ’'] = 'o',
	
	['ॐ'] = 'om',
	
	-- chandrabindu
	['ँ'] = 'ṅ',
	
	-- anusvara
	['ं'] = 'ŋ',
	
	-- visarga
	['ः'] = 'ḥ',
	
	-- virama
	['्'] = '',
	
	-- numerals
	['०'] = '0', ['१'] = '1', ['२'] = '2', ['३'] = '3', ['४'] = '4',
	['५'] = '5', ['६'] = '6', ['७'] = '7', ['८'] = '8', ['९'] = '9',
	
	-- punctuation
	['।'] = '.', -- danda
	['॥'] = '.', -- double danda
	['+'] = '', -- compound separator
	
	-- abbreviation sign
	['॰'] = '.',
}

-- These clusters when occurring word-finally will not trigger a schwa added
-- after them even though the second consonant is in special_cons, which normally
-- causes the extra schwa to be added. NOTE: The clusters are reversed from their
-- ultimate effect, e.g. the first cluster is written 'ml' but actually applies
-- to words ending in 'lm'. The clusters below overall refer to the six clusters
-- describable by [rl][mnv], i.e. rm, rn, rv, lm, ln, lv.
local perm_cl = {
	['म्ल'] = true, ['व्ल'] = true, ['न्ल'] = true,
	['म्र'] = true, ['व्र'] = true, ['न्र'] = true,
}

local all_cons, special_cons = 'कखगघङचछजझञटठडढतथदधपफबभशषसयरलवहणनम', 'यरलवहनम'
local vowel, vowel_sign = '*िुेोाैौ’o\'', 'अइउएओआऐऔअ’\''
local long_vowel, short_vowel = 'ाैौआऐऔ', '*िुेो’अइउएओअ\''
local syncope_pattern = '([' .. vowel .. vowel_sign .. '])(़?[' .. all_cons .. '])o(़?[' .. all_cons .. '])([ंँ]?[' .. vowel .. vowel_sign .. '])'

local function rev_string(text)
	local result, length = {}, mw.ustring.len(text)
	for i = length, 1, -1 do
		table.insert(result, mw.ustring.sub(text, i, i))
	end
	return table.concat(result)
end

function export.tr(text, lang, sc)
	--abbreviation dot
	text = gsub(text, '॰', '.')
	text = gsub(text, '([' .. all_cons .. ']़?)([' .. vowel .. '्]?)', function(c, d)
		return c .. (d == "" and 'o' or d) end)
	for word in mw.ustring.gmatch(text, "[ऀऀ-ॿo']+") do
		local orig_word = word
		word = rev_string(word)
		word = gsub(word, '^o(़?)([' .. all_cons .. '])(.)(.?)', function(opt, first, second, third)
			return (((match(first, '[' .. special_cons .. ']') and match(second, '्') and not perm_cl[first..second..third])
				or match(first .. second, 'य[ी]'))
				and 'o' or "") .. opt .. first .. second .. third end)
		while match(word, syncope_pattern) do
			word = gsub(word, syncope_pattern, '%1%2%3%4')
		end
		word = rev_string(word)
		-- Convert * to %* so we can match it in a regex.
		local escaped_orig_word = gsub(orig_word, "%*", "%*")
		text = gsub(text, escaped_orig_word, word)
	end
	text = gsub(text, '.़?', conv)
    text = gsub(text, '%*', 'o')
	return mw.ustring.toNFC(text)
end

return export