Module:Lana-sortkey

This module will sort text in the Tai Tham script. It is used to sort , Khün, Northern Thai, and Yong. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{sortkey}}. Within a module, use Module:languages#Language:makeSortKey.

For testcases, see Module:Lana-sortkey/testcases.

Functions

makeSortKey(text, lang, sc)
Generates a sortkey for a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the sort fails, returns nil.

local export = {}
local u = mw.ustring.char
local minorMarkSet = "([" .. u(0x1A73) .. "-" .. u(0x1A7C) .. "])"

local minorMarks = {
	[u(0x1A73)] = "0", [u(0x1A74)] = "0", [u(0x1A75)] = "1", [u(0x1A76)] = "2", [u(0x1A77)] = "3",
	[u(0x1A78)] = "4", [u(0x1A79)] = "5", [u(0x1A7A)] = "6", [u(0x1A7B)] = "7", [u(0x1A7C)] = "8"
}

local monographs = {
	["[%pᪧ]"] = "", ["ᩓ"] = "ᩃᩯ", ["ᩔ"] = "ᩈ᩠ᩈ",
	["ᩕ"] = "᩠ᩁ", ["ᩖ"] = "᩠ᩃ", ["ᩗ"] = "᩠ᩃ",
	["ᩘ"] = "᩠ᨦ", ["ᩙ"] = "᩠ᨦ", ["ᩚ"] = "᩠ᨻ",
	["ᩜ"] = "᩠ᨾ", ["ᩝ"] = "᩠ᨷ", ["ᩞ"] = "᩠ᩈ", ["ᩤ"] = "ᩣ"
}

local digraphs = {
	["([ᨭ-ᨱ])ᩛ"] = "%1᩠ᨮ", ["([ᨲ-ᨶ])ᩛ"] = "%1᩠ᨳ", ["([ᨷ-ᨾ])ᩛ"] = "%1᩠ᨻ"
}

function export.makeSortKey(text, lang, sc)
	local minorKey = ""
	for mark in mw.ustring.gmatch(text, minorMarkSet) do
		minorKey = minorKey .. minorMarks[mark]
	end
	text = mw.ustring.gsub(text, minorMarkSet, "")
	
	for from, to in pairs(digraphs) do
		text = mw.ustring.gsub(text, from, to)
	end
	
	for from, to in pairs(monographs) do
		text = mw.ustring.gsub(text, from, to)
	end
	
	return text .. minorKey
end

return export