Module:sa-Kthi-translit


This module will transliterate Sanskrit language text per WT:SA TR. It is also used to transliterate Old Awadhi. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:sa-Kthi-translit/testcases.

Functions

edit
tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local export = {}

local m_str_utils = require("Module:string utilities")
local gsub = m_str_utils.gsub
local match = m_str_utils.match
local toNFC = mw.ustring.toNFC
local u = m_str_utils.char

local consonants = {
	['𑂍'] = 'k', ['𑂎'] = 'kh', ['𑂏'] = 'g', ['𑂐'] = 'gh', ['𑂑'] = 'ṅ', 
	['𑂒'] = 'c', ['𑂓'] = 'ch', ['𑂔'] = 'j', ['𑂕'] = 'jh', ['𑂖'] = 'ñ', 
	['𑂗'] = 'ṭ', ['𑂘'] = 'ṭh', ['𑂙'] = 'ḍ', ['𑂛'] = 'ḍh', ['𑂝'] = 'ṇ',
	['𑂞'] = 't', ['𑂟'] = 'th', ['𑂠'] = 'd', ['𑂡'] = 'dh', ['𑂢'] = 'n',
	['𑂣'] = 'p', ['𑂤'] = 'ph', ['𑂥'] = 'b', ['𑂦'] = 'bh', ['𑂧'] = 'm', 
	['𑂨'] = 'y', ['𑂩'] = 'r', ['𑂪'] = 'l', ['𑂫'] = 'v', ['𑂫'] = 'v', ['ळ'] = 'ḷ',
	['𑂬'] = 'ś', ['𑂭'] = 'ṣ', ['𑂮'] = 's', ['𑂯'] = 'h',
	['𑂚'] = 'ṛ', ['𑂚'] = 'ṛ', ['𑂜'] = 'ṛh', ['𑂜'] = 'ṛh',
	-- ['𑂔𑂹𑂖'] = 'gy',
}

local diacritics = {
	['𑂱'] = 'i', ['𑂳'] = 'u', ['𑂵'] = 'e', ['𑂷'] = 'o', 
	['𑂰'] = 'ā', ['𑂲'] = 'ī', ['𑂴'] = 'ū', ['𑃂'] = 'r̥',
	['𑂶'] = 'ai', ['𑂸'] = 'au', 

	-- virama
	['𑂹'] = '',
}

local diatrema = {
	['𑂅'] = 'ï', ['𑂇'] = 'ü',
}

local tt = {

	-- vowels
	['𑂃'] = 'a', ['𑂅'] = 'i', ['𑂇'] = 'u', ['𑂉'] = 'e', ['𑂋'] = 'o',
	['𑂄'] = 'ā', ['𑂆'] = 'ī', ['𑂈'] = 'ū',  
	['𑂊'] = 'ai', ['𑂌'] = 'au', 

	['ॐ'] = 'om',

	-- chandrabindu
	['𑂀'] = 'm̐',

	-- anusvara
	['𑂁'] = 'ṃ',

	-- visarga
	['𑂂'] = 'ḥ',

	--numerals
	['०'] = '0', ['१'] = '1', ['२'] = '2', ['३'] = '3', ['४'] = '4',
	['५'] = '5', ['६'] = '6', ['७'] = '7', ['८'] = '8', ['९'] = '9',

	--punctuation        
	['𑃀'] = '.', -- danda
	["𑃁"] = ".", -- double danda

	['+'] = '', -- compound separator

    --reconstructed
    ['*'] = '',
}

function export.tr(text, lang, sc)
	text = gsub(
		text,
		'([𑂍-𑂯])'..
		'([𑂰-𑂹]?)'..
		'([𑂅𑂇]?)',
		function(c, d, e)
			if d == "" and e ~= "" then
				return consonants[c] .. 'a' .. diatrema[e]
			elseif e ~= "" then
				return consonants[c] .. diacritics[d] .. tt[e]
			elseif d == "" then        
				return consonants[c] .. 'a'
			else
				return consonants[c] .. diacritics[d]
			end
		end)

-- Adjacent vowel letters needing dieresis

	text = gsub(text, '([𑂃])([𑂅𑂇])', function(a, b) return tt[a]..diatrema[b] end)

	text = gsub(text, '.', tt)

	return toNFC(text)

end

return export