Module:sa-Kthi-translit
- The following documentation is generated by Module:documentation/functions/translit. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
This module will transliterate Sanskrit language text per WT:SA TR. It is also used to transliterate Old Awadhi.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:sa-Kthi-translit/testcases.
Functions
edittr(text, lang, sc)
- Transliterates a given piece of
text
written in the script specified by the codesc
, and language specified by the codelang
. - When the transliteration fails, returns
nil
.
local export = {}
local m_str_utils = require("Module:string utilities")
local gsub = m_str_utils.gsub
local match = m_str_utils.match
local toNFC = mw.ustring.toNFC
local u = m_str_utils.char
local consonants = {
['𑂍'] = 'k', ['𑂎'] = 'kh', ['𑂏'] = 'g', ['𑂐'] = 'gh', ['𑂑'] = 'ṅ',
['𑂒'] = 'c', ['𑂓'] = 'ch', ['𑂔'] = 'j', ['𑂕'] = 'jh', ['𑂖'] = 'ñ',
['𑂗'] = 'ṭ', ['𑂘'] = 'ṭh', ['𑂙'] = 'ḍ', ['𑂛'] = 'ḍh', ['𑂝'] = 'ṇ',
['𑂞'] = 't', ['𑂟'] = 'th', ['𑂠'] = 'd', ['𑂡'] = 'dh', ['𑂢'] = 'n',
['𑂣'] = 'p', ['𑂤'] = 'ph', ['𑂥'] = 'b', ['𑂦'] = 'bh', ['𑂧'] = 'm',
['𑂨'] = 'y', ['𑂩'] = 'r', ['𑂪'] = 'l', ['𑂫'] = 'v', ['𑂫'] = 'v', ['ळ'] = 'ḷ',
['𑂬'] = 'ś', ['𑂭'] = 'ṣ', ['𑂮'] = 's', ['𑂯'] = 'h',
['𑂚'] = 'ṛ', ['𑂚'] = 'ṛ', ['𑂜'] = 'ṛh', ['𑂜'] = 'ṛh',
-- ['𑂔𑂹𑂖'] = 'gy',
}
local diacritics = {
['𑂱'] = 'i', ['𑂳'] = 'u', ['𑂵'] = 'e', ['𑂷'] = 'o',
['𑂰'] = 'ā', ['𑂲'] = 'ī', ['𑂴'] = 'ū', ['𑃂'] = 'r̥',
['𑂶'] = 'ai', ['𑂸'] = 'au',
-- virama
['𑂹'] = '',
}
local diatrema = {
['𑂅'] = 'ï', ['𑂇'] = 'ü',
}
local tt = {
-- vowels
['𑂃'] = 'a', ['𑂅'] = 'i', ['𑂇'] = 'u', ['𑂉'] = 'e', ['𑂋'] = 'o',
['𑂄'] = 'ā', ['𑂆'] = 'ī', ['𑂈'] = 'ū',
['𑂊'] = 'ai', ['𑂌'] = 'au',
['ॐ'] = 'om',
-- chandrabindu
['𑂀'] = 'm̐',
-- anusvara
['𑂁'] = 'ṃ',
-- visarga
['𑂂'] = 'ḥ',
--numerals
['०'] = '0', ['१'] = '1', ['२'] = '2', ['३'] = '3', ['४'] = '4',
['५'] = '5', ['६'] = '6', ['७'] = '7', ['८'] = '8', ['९'] = '9',
--punctuation
['𑃀'] = '.', -- danda
["𑃁"] = ".", -- double danda
['+'] = '', -- compound separator
--reconstructed
['*'] = '',
}
function export.tr(text, lang, sc)
text = gsub(
text,
'([𑂍-𑂯])'..
'([𑂰-𑂹]?)'..
'([𑂅𑂇]?)',
function(c, d, e)
if d == "" and e ~= "" then
return consonants[c] .. 'a' .. diatrema[e]
elseif e ~= "" then
return consonants[c] .. diacritics[d] .. tt[e]
elseif d == "" then
return consonants[c] .. 'a'
else
return consonants[c] .. diacritics[d]
end
end)
-- Adjacent vowel letters needing dieresis
text = gsub(text, '([𑂃])([𑂅𑂇])', function(a, b) return tt[a]..diatrema[b] end)
text = gsub(text, '.', tt)
return toNFC(text)
end
return export