Module:wlm-sortkey


local export = {}
local u = mw.ustring.char
local a = u(0xF000)

local remove_diacritics = u(0x0300) .. "-" .. u(0x0302) .. u(0x0308) .. "'" -- grave, acute, circumflex, diaeresis, apostrophe

local oneChar = {
	["k"] = "c"
}

local twoChars = {
	["ch"] = "c" .. a, ["dd"] = "d" .. a, ["ff"] = "f" .. a, ["ll"] = "l" .. a, ["ph"] = "p" .. a, ["rh"] = "r" .. a, ["th"] = "t" .. a
}

local threeChars = {
	["ngh"] = "g" .. a
}

function export.makeSortKey(text, lang, sc)
	text = mw.ustring.lower(text)
	
	for from, to in pairs(threeChars) do
		text = mw.ustring.gsub(text, from, to)
	end
	
	for from, to in pairs(twoChars) do
		text = mw.ustring.gsub(text, from, to)
	end
	
	return mw.ustring.upper(mw.ustring.toNFC(mw.ustring.gsub(mw.ustring.toNFD(mw.ustring.gsub(text, ".", oneChar)), "[" .. remove_diacritics .. "]", ""))) -- decompose, remove appropriate diacritics, then recompose again
end

return export