Module:abq-translit

This module will transliterate Abaza language text per WT:ABQ TR. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:abq-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local m_str_utils = require("Module:string utilities")

local gmatch = m_str_utils.gmatch
local gsub = m_str_utils.gsub
local lower = m_str_utils.lower
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local u = m_str_utils.char

local GRAVE, ACUTE, CIRC, CARON, DOTBELOW = u(0x300), u(0x301), u(0x302), u(0x30C), u(0x323)

local export = {}

local tt = {
	["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["д"] = "d", ["е"] = "e", ["ё"] = "jo", ["ж"] = "ž", ["з"] = "z", ["и"] = "i", ["й"] = "j", ["к"] = "k", ["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "p", ["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "x", ["ц"] = "c", ["ч"] = "ć", ["ш"] = "š", ["щ"] = "ś", ["ъ"] = "ʔ", ["ы"] = "ə", ["ь"] = "ʲ", ["э"] = "e", ["ю"] = "ju", ["я"] = "ja",
	["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Д"] = "D", ["Е"] = "E", ["Ё"] = "Jo", ["Ж"] = "Ž", ["З"] = "Z", ["И"] = "I", ["Й"] = "J", ["К"] = "K", ["Л"] = "L", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "P", ["Р"] = "R", ["С"] = "S", ["Т"] = "T", ["У"] = "U", ["Ф"] = "F", ["Х"] = "X", ["Ц"] = "C", ["Ч"] = "Ć", ["Ш"] = "Š", ["Щ"] = "Ś", ["Ъ"] = "Ɂ", ["Ы"] = "Ə", ["Ь"] = "ʲ", ["Э"] = "E", ["Ю"] = "Ju", ["Я"] = "Ja"
}

local digraphs = {
	["гъ"] = "ɣ", ["гӏ"] = "ʻ", ["жь"] = "ź", ["къ"] = "qʼ", ["кӏ"] = "kʼ", ["пӏ"] = "pʼ", ["тл"] = "tˡ", ["тӏ"] = "tʼ", ["фӏ"] = "fʼ", ["хъ"] = "q", ["хӏ"] = "ḥ", ["цӏ"] = "cʼ", ["чӏ"] = "ćʼ", ["шӏ"] = "čʼ",
	["Гъ"] = "Ɣ", ["Гӏ"] = "ʻ", ["Жь"] = "Ź", ["Къ"] = "Qʼ", ["Кӏ"] = "Kʼ", ["Пӏ"] = "Pʼ", ["Тл"] = "Tˡ", ["Тӏ"] = "Tʼ", ["Фӏ"] = "Fʼ", ["Хъ"] = "Q", ["Хӏ"] = "Ḥ", ["Цӏ"] = "Cʼ", ["Чӏ"] = "Ćʼ", ["Шӏ"] = "Čʼ"
}

-- Prevents overlapping substitutions.
local digraphs2 = {
	["ль"] = "lᶻ", ["лӏ"] = "lˢʼ",
	["Ль"] = "Lᶻ", ["Лӏ"] = "Lˢʼ"
}

function export.tr(text, lang, sc)
	-- Convert uppercase palochka to lowercase, along with any "false" palochkas (entered as Latin "I" or "l", or Cyrillic "І"). Lowercase palochka is found in tables above.
	text = gsub(text, "[IlІӀ]", "ӏ")
	
	-- Contextual substitution of "j" before "е", "w" for "у" and ʷ for "в".
	text = gsub(text, "^е", "jе")
	text = gsub(text, "^Е", "Jе")
	text = gsub(text, "([аеёиоуыэюяАЕЁИОУЫЭЮЯ%s%p])е", "%1jе")
	text = gsub(text, "([%s%p])Е", "%1Jе")
	text = gsub(text, "у([аиоуыэ])", "w%1")
	text = gsub(text, "У([аиоуыэ])", "W%1")
	text = gsub(text, "([аеёиоуыэюяАЕЁИОУЫЭЮЯ])у", "%1w")
	text = gsub(text, "([бгджзклмнпрстфхцчшщъьӏБГДЖЗКЛМНПРСТФХЦЧШЩЪЬӀ])в", "%1ʷ")
	
	for digraph, replacement in pairs(digraphs) do
		text = gsub(text, digraph, replacement)
	end
	
	for digraph, replacement in pairs(digraphs2) do
		text = gsub(text, digraph, replacement)
	end
	
	text = gsub(text, ".", tt)
	
	-- Reposition apostrophes then decompose.
	text = toNFD(gsub(gsub(text, "ʼʲ", "ʲʼ"), "ʼʷ", "ʷʼ"))
	
	-- When double letters both have a modifier letter and/or an apostrophe, only show on the second for readability purposes.
	for letter in gmatch("abcdefghijklmnopqrstuvxzəɣʔABCDEFGHIJKLMNOPQRSTUVXZƏƔɁ", ".") do
		text = gsub(text, letter .. "([" .. GRAVE .. ACUTE .. CIRC .. CARON .. DOTBELOW .. "]?)([ʲˡˢʷᶻ]?[ʲʷ]?ʼ?)" .. lower(letter) .. "%1%2", letter .. "%1" .. lower(letter) .. "%1%2")
	end
	
	-- Remove consecutive j/ʲ and w/ʷ then recompose.
	return toNFC(gsub(gsub(text, "ʲ?([Jj])ʲ?", "%1"), "ʷ?([Ww])ʷ?", "%1"))
end

return export