This module will generate entry names for Arabic language text. It is also used to generate entry names for Tajiki Arabic, Baharna Arabic, Iraqi Arabic, Hijazi Arabic, Omani Arabic, Dhofari Arabic, Tunisian Arabic, Gulf Arabic, South Levantine Arabic, North Levantine Arabic, Sudanese Arabic, Algerian Arabic, Najdi Arabic, Moroccan Arabic, Egyptian Arabic, Uzbeki Arabic, Libyan Arabic, Yemeni Arabic, North Mesopotamian Arabic, Azerbaijani, Old Anatolian Turkish, Bulgar, and Karakhanid. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{entryname}}. Within a module, use Module:languages#Language:makeEntryName.

For testcases, see Module:ar-entryname/testcases.

Functions

makeEntryName(text, lang, sc)
Generates an entry name for a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When entry name generation fails, returns nil.

local export = {}

function export.makeEntryName(text, lang, sc)
	local U = mw.ustring.char
	local taTwiil = U(0x640)
	local waSla = U(0x671)
	-- diacritics ordinarily removed by entry_name replacements
	local Arabic_diacritics = U(0x64B, 0x64C, 0x64D, 0x64E, 0x64F, 0x650, 0x651, 0x652, 0x670)
	
	if text == waSla or mw.ustring.find(text, "^" .. taTwiil .. "?[" .. Arabic_diacritics .. "]" .. "$") then
		return text
	end
	
	-- replace alif waṣl with alif
	-- remove tatweel and diacritics: fathatan, dammatan, kasratan, fatha,
	-- damma, kasra, shadda, sukun, superscript (dagger) alef
	local replacements = {
		from = {U(0x0671), U(0x0640), "[" .. U(0x064B) .. "-" .. U(0x0652) .. "]", U(0x0670), U(0x0656)},
		to   = {U(0x0627)}
	}
	
	for i, from in ipairs(replacements.from) do
		local to = replacements.to[i] or ""
		text = mw.ustring.gsub(text, from, to)
	end
	
	return text
end

return export