This module will sort text in the Kana script. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{sortkey}}. Within a module, use Module:languages#Language:makeSortKey.

For testcases, see Module:Hrkt-sortkey/testcases.

Functions

makeSortKey(text, lang, sc)
Generates a sortkey for a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the sort fails, returns nil.

local m_ja = require("Module:ja")

local kata_to_hira = m_ja.kata_to_hira
local normalize_kana = m_ja.normalize_kana
local umatch = mw.ustring.match

local data = mw.loadData("Module:Hrkt-sortkey/data")

local export = {}

-- Generate a sortkey from a kana input. This can be any combination of hiragana and katakana, and will not be normalized to hiragana or katakana first. Use [[Module:Hira-sortkey]] or [[Module:Kana-sortkey]] if that is required, which in turn call this module.
function export.makeSortKey(text, lang, sc)
	-- Normalize long vowel and iteration marks.
	text = normalize_kana(text)
	-- Middle dots and double hyphens (including fullwidth equals signs) become spaces, unless that results in the output becoming all whitespace.
	local orig_text = text
	text = text:gsub("[\227\239][\130\131\188][\157\160\187]", data.spaces)
	text = umatch(text, "^%s+$") and orig_text or text
	-- For each dakuten and handakuten, remove it and add 1 or 2 apostrophes to the end (respectively).
	local apos = 0
	text = text:gsub("\227\130[\153\154]", function(char)
		apos = apos + data.voicing[char]
		return ""
	end)
	return apos > 0 and (text .. ("'"):rep(apos)) or text
end

return export