This is a private module sandbox of Theknightwho, for their own experimentation. Items in this module may be added and removed at Theknightwho's discretion; do not rely on this module's stability.


local export = {}

local append = require("Module:table").append
local codepoint = mw.ustring.codepoint
local concat = table.concat
local explode_utf8 = require("Module:string utilities").explode_utf8
local floor = math.floor
local format = string.format
local insert = table.insert
local pattern_escape = require("Module:utilities").pattern_escape

local data = require("Module:User:Theknightwho/sortkey/serialized")
local UTF8_char = "[%z\1-\127\194-\244][\128-\191]*"

function export.sortkey(text)
    if type(text) == "table" then
    	text = text.term.term
    end
	
	local chars = explode_utf8(text)
	
	local function table_insert(t, v)
		if v ~= 0 then
			insert(t, v)
		end
	end
	
	local primary = {}
	local secondary = {}
	local tertiary = {}
	
	local function insert_weights(w1, w2, w3)
		table_insert(primary, w1)
		table_insert(secondary, w2)
		table_insert(tertiary, w3)
	end
	
	for _, char in ipairs(chars) do
		local cp = codepoint(char)
		if (cp >= 0x17000 and cp <= 0x18AFF) or (cp >= 0x18D00 and cp <= 0x18D8F) then
			insert_weights(0xFB00, 0x20, 2)
			insert_weights((cp - 0x17000) % 0x8000 + 0x8000, 0, 0)
		elseif cp >= 0x1B170 and cp <= 0x1B2FF then
			insert_weights(0xFB01, 0x20, 2)
			insert_weights((cp - 0x1B170) % 0x8000 + 0x8000, 0, 0)
		elseif cp >= 0x18B00 and cp <= 0x18CFF then
			insert_weights(0xFB02, 0x20, 2)
			insert_weights((cp - 0x18B00) % 0x8000 + 0x8000, 0, 0)
		elseif (cp >= 0x4E00 and cp <= 0x9FFF) or (cp >= 0xF900 and cp <= 0xFAFF) then
			insert_weights(0xFB40 + floor(cp / 0x8000), 0x20, 2)
			insert_weights(cp % 0x8000 + 0x8000, 0, 0)
		elseif (cp >= 0x3400 and cp <= 0x4DBF) or (cp >= 0x20000 and cp <= 0x2A6DF) or (cp >= 0x2A700 and cp <= 0x2EBEF) or (cp >= 0x30000 and cp <= 0x323AF) then
			insert_weights(0xFB80 + floor(cp / 0x8000), 0x20, 2)
			insert_weights(cp % 0x8000 + 0x8000, 0, 0)
		else
			if char == "\0" then
				char = "%z"
			end
			local char_data = data:match("\255(" .. pattern_escape(char) .. "[^\255]+)\255")
			if not char_data then
				insert_weights(0xFBC0 + floor(cp / 0x8000), 0x20, 2)
				insert_weights(cp % 0x8000 + 0x8000, 0, 0)
			else
				for typ, w1, w2, w3 in char_data:gmatch("([\253\254])(" .. UTF8_char .. ")(" .. UTF8_char .. ")(" .. UTF8_char .. ")") do
					insert_weights(codepoint(w1), codepoint(w2), codepoint(w3))
				end
			end
		end
	end
	local key = append(primary, secondary, tertiary)
	for k, v in ipairs(key) do
		key[k] = format("%04x", v)
	end
	return concat(key)
end

function export.sort(t)
	local max = math.max
	local memo = {}
	table.sort(t, function(k1, k2)
		memo[k1] = memo[k1] or export.sortkey(k1)
		memo[k2] = memo[k2] or export.sortkey(k2)
		return memo[k1] < memo[k2]
	end)
	return t
end

return export