This module will transliterate Ubykh language text per WT:UBY TR. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:uby-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local u = require("Module:string/char")

local export = {}

local GRAVE, ACUTE, CIRC, BREVE, CARON, DOTBELOW = u(0x300), u(0x301), u(0x302), u(0x306), u(0x30C), u(0x323)
local accent = "[" .. GRAVE .. ACUTE .. CIRC .. BREVE .. CARON .. DOTBELOW .. "]"

local tt = {
	["а"] = "a", ["б"] = "b", ["в"] = "v", ["г"] = "g", ["ӷ"] = "ğ", ["ҕ"] = "ğ", ["д"] = "d", ["е"] = "e", ["ё"] = "jo", ["ж"] = "ẑ", ["з"] = "z", ["ҙ"] = "ź", ["ӡ"] = "dz", ["и"] = "i", ["й"] = "j", ["к"] = "kʼ", ["қ"] = "k", ["ҟ"] = "qʼ", ["ӄ"] = "q", ["л"] = "l", ["ԯ"] = "lˢ", ["м"] = "m", ["н"] = "n", ["о"] = "o", ["п"] = "pʼ", ["ԥ"] = "p", ["ҧ"] = "p", ["р"] = "r", ["с"] = "s", ["ҫ"] = "ś", ["т"] = "tʼ", ["ҭ"] = "t", ["у"] = "u", ["ф"] = "f", ["х"] = "x", ["ҳ"] = "h", ["ц"] = "c", ["ҵ"] = "cʼ", ["ч"] = "č", ["ҷ"] = "čʼ", ["ҽ"] = "ĉ", ["ҿ"] = "ĉʼ", ["ш"] = "ŝ", ["ы"] = "ə", ["ҩ"] = "jʷ", ["џ"] = "dẑ", ["ь"] = "ʲ", ["ә"] = "ʷ", ["э"] = "e", ["ю"] = "ju", ["я"] = "ja", ["ӏ"] = "ʔ",
	["А"] = "A", ["Б"] = "B", ["В"] = "V", ["Г"] = "G", ["Ӷ"] = "Ğ", ["Ҕ"] = "Ğ", ["Д"] = "D", ["Е"] = "E", ["Ё"] = "Jo", ["Ж"] = "Ẑ", ["З"] = "Z", ["Ҙ"] = "Ź", ["Ӡ"] = "Dz", ["И"] = "I", ["Й"] = "J", ["К"] = "Kʼ", ["Қ"] = "K", ["Ҟ"] = "Qʼ", ["Ӄ"] = "Q", ["Л"] = "L", ["Ԯ"] = "Lˢ", ["М"] = "M", ["Н"] = "N", ["О"] = "O", ["П"] = "Pʼ", ["Ԥ"] = "P", ["Ҧ"] = "P", ["Р"] = "R", ["С"] = "S", ["Ҫ"] = "Ś", ["Т"] = "Tʼ", ["Ҭ"] = "T", ["У"] = "U", ["Ф"] = "F", ["Х"] = "X", ["Ҳ"] = "H", ["Ц"] = "C", ["Ҵ"] = "Cʼ", ["Ч"] = "Č", ["Ҷ"] = "Čʼ", ["Ҽ"] = "Ĉ", ["Ҿ"] = "Ĉʼ", ["Ш"] = "Ŝ", ["Ы"] = "Ə", ["Ҩ"] = "Jʷ", ["Џ"] = "Dẑ", ["Ь"] = "ʲ", ["Ә"] = "ʷ", ["Э"] = "E", ["Ю"] = "Ju", ["Я"] = "Ja", ["Ӏ"] = "Ɂ"
}

local digraphs = {
	["б'"] = "ḅ", ["в'"] = "ṿ", ["ӷ'"] = "ğ̣", ["ҕ'"] = "ğ̣", ["жь"] = "ž", ["ӡь"] = "dź", ["ӡ'"] = "dź", ["ҟ'"] = "q̣ʼ", ["ӄ'"] = "q̣", ["ԯь"] = "lˢʼ", ["м'"] = "ṃ", ["п'"] = "p̣ʼ", ["ԥ'"] = "p̣", ["ф'"] = "fʼ", ["х'"] = "x̣", ["ць"] = "ć", ["ц'"] = "ć", ["ҵь"] = "ćʼ", ["ҵ'"] = "ćʼ", ["шь"] = "š", ["џь"] = "dž",
	["Б'"] = "Ḅ", ["В'"] = "Ṿ", ["Ӷ'"] = "Ğ̣", ["Ҕ'"] = "Ğ̣", ["Жь"] = "Ž", ["Ӡь"] = "Dź", ["Ӡ'"] = "Dź", ["Ҟ'"] = "Q̣ʼ", ["Ӄ'"] = "Q̣", ["Ԯь"] = "Lˢʼ", ["М'"] = "Ṃ", ["П'"] = "P̣ʼ", ["Ԥ'"] = "P̣", ["Ф'"] = "Fʼ", ["Х'"] = "X̣", ["Ць"] = "Ć", ["Ц'"] = "Ć", ["Ҵь"] = "Ćʼ", ["Ҵ'"] = "Ćʼ", ["Шь"] = "Š", ["Џь"] = "Dž"
}

function export.tr(text, lang, sc)
	local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*"
	
	-- Convert uppercase palochka to lowercase, along with any "false" palochkas (entered as Latin "I" or "l", or Cyrillic "І"). Lowercase palochka is found in tables above.
	text = mw.ustring.gsub(text, "[IlІӀ]", "ӏ")
	
	-- Contextual substitution of "w" for "у", "j" for "и" and "j" before "е". NOTE: These break with string.gsub, so must use mw.ustring.gsub.
	text = mw.ustring.gsub(text, "у([аеиоуыэ])", "w%1")
	text = mw.ustring.gsub(text, "У([аеиоуыэ])", "W%1")
	text = mw.ustring.gsub(text, "([аеёиоуыэюяАЕЁИОУЫЭЮЯ])у", "%1w")
	text = mw.ustring.gsub(text, "и([аеиоуыэ])", "j%1")
	text = mw.ustring.gsub(text, "И([аеиоуыэ])", "J%1")
	text = mw.ustring.gsub(text, "([аеёиоуыэюяАЕЁИОУЫЭЮЯ])и", "%1j")
	text = mw.ustring.gsub(text, "([аеёиоуыэюяАЕЁИОУЫЭЮЯ])е", "%1jе")
	
	for digraph, replacement in pairs(digraphs) do
		text = string.gsub(text, digraph, replacement)
	end
	
	text = string.gsub(text, UTF8_char, tt)
	
	-- Reposition apostrophes then decompose.
	text = mw.ustring.toNFD(mw.ustring.gsub(mw.ustring.gsub(text, "ʼʲ", "ʲʼ"), "ʼʷ", "ʷʼ"))
	
	-- When double letters both have a modifier letter and/or an apostrophe, only show on the second for readability purposes.
	for letter in string.gmatch("abcdefghijklmnopqrstuvxzəʔABCDEFGHIJKLMNOPQRSTUVXZƏɁ", UTF8_char) do
		text = mw.ustring.gsub(text, letter .. "(" .. accent .. "?" .. accent .. "?)([ʲˢʷ]?[ʲʷ]?ʼ?)" .. mw.ustring.lower(letter) .. "%1%2", letter .. "%1" .. mw.ustring.lower(letter) .. "%1%2")
	end
	
	-- Remove consecutive j/ʲ and w/ʷ then recompose.
	return mw.ustring.toNFC(mw.ustring.gsub(mw.ustring.gsub(text, "ʲ?([Jj])ʲ?", "%1"), "ʷ?([Ww])ʷ?", "%1"))
end

return export