Module:User:Erutuon/sandbox

This module contains a lot of random functions created by User:Erutuon. Some have served a purpose, some have continuing use, and others were one-time experiments.

Testcases edit

pattern ـَ
a
ـَ
a
ـَ
a
root ف
f
ع
ʕ
ل
l
pattern تَ
ta
ـِي
ī
root ف
f
ع
ʕ
ل
l
pattern ـَ
a
ـَ
a
ـَ
a
root ف
f
ع
ʕ
ع
ʕ
ل
l
pattern ـَ
a
ـَا
ā
root ف
f
ع
ʕ
ع
ʕ
ل
l
pattern اِ
i
تَ
ta
ـَ
a
ـَ
a
root ف
f
ع
ʕ
ل
l
pattern يَسْتَ
yasta
ـِ
i
ـُ
u
root ف
f
ع
ʕ
ل
l
the rest
  • U(0x65, 0x78, 0x61, 0x6D, 0x70, 0x6C, 0x65)
ء ــَ ل ــِ ف
consonant vowel consonant vowel consonant

local export = {}

local gsub = mw.ustring.gsub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local match = mw.ustring.match
local sub = mw.ustring.sub
local U = mw.ustring.char

local function get(term, index)
	return sub(term, index, index)
end

local fatHa = U(0x64E)
local fatHatan = U(0x64B)
local kasratan = U(0x64D)
local Dammatan = U(0x64C)
local kasra = U(0x650)
local Damma = U(0x64F)
local sukuun = U(0x652)
local shadda = U(0x651)
local vowel_diacritics = fatHa .. kasra .. Damma .. fatHatan .. kasratan .. Dammatan
local vowel = "[" .. vowel_diacritics .. "]"
local diacritic = "[" .. vowel_diacritics .. sukuun .. shadda .. "]"
local alif = "ا"
local waaw = "و"
local yaa = "ي"
local alif_maqSuura = "ى"
local madda = "آ"
local waSla = "ٱ"
local hamza = "ء"
local alif_hamza_above = "أ"
local alif_hamza_below = "إ"
local yaa_hamza = "ئ"
local waaw_hamza = "ؤ"
local taa_marbuuTa = "ة"
local taTwiil = U(0x640)

local graphemes = {
	["consonant"] = { "ب", "ت", "ث", "ج", "ح", "خ", "د", "ذ", "ر", "ز", "س", "ش", "ص", "ض", "ط", "ظ", "ع", "غ", "ق", "ف", "ل", "ك", "م", "ن", "و", "ي", "ء", },
	 ["vowel"] = {
		fatHa,
		fatHa .. alif,
		fatHa .. alif_maqSuura,
		Damma,
		Damma .. waaw,
		kasra,
		kasra .. yaa,
		fatHatan,
		fatHatan .. alif,
		alif .. fatHatan,
		Dammatan,
		kasratan,
		},
	["diacritic"] = {
		sukuun, shadda,
	},
	["sequence"] = {
		madda,
	},
	["other"] = {
		waSla,
	},
}

data = {}

for kind, list in pairs(graphemes) do
	for _, grapheme in pairs(list) do
		data[grapheme] = {}
		data[grapheme].type = kind
	end
end

local function getType(char)
	local charData = data[char]
	if charData and charData.type then
		return charData.type
	else
		return nil
	end
end

local function convertHamza(term)
	term = gsub(term,
		"[" .. alif_hamza_above .. alif_hamza_below .. yaa_hamza .. waaw_hamza .. "]",
		hamza)
	term = gsub(term, madda, hamza .. fatHa .. alif)
	return term
end

local function add(list, chars, term)
	table.insert(list, chars)
	length = mw.ustring.len(chars)
	term = sub(term, length + 1)
	return list, term
end

local function disintegrate(term)
	local output = {}
	
	term = convertHamza(term)
	
	while mw.ustring.len(term) > 0 do
		local char1 = get(term, 1) or ""
		local char2 = get(term, 2) or ""
		local chars = sub(term, 1, 2) or ""
		
		if mw.ustring.len(char1) > 1 then
			error(char1 .. " isn't a single character.")
		end
		
		if getType(char1) == "consonant" then
			if char2 == shadda then
				table.insert(output, { char1, type = getType(char1) })
				table.insert(output, { char1, type = getType(char1) } )
				term = sub(term, 3)
			else
				table.insert(output, { char1, type = getType(char1) } )
				term = sub(term, 2)
			end
		elseif getType(chars) == "vowel" then
			table.insert(output, { taTwiil .. chars, type = getType(chars) } )
			term = sub(term, 3)
		elseif getType(char1) == "vowel" then
			table.insert(output, { taTwiil .. char1, type = getType(char1) } )
			term = sub(term, 2)
		elseif char1 == sukuun then
			term = sub(term, 2)
		elseif char1 == madda then
			error("Maddas should have been replaced by convertHamza.")
		else
			error("The character " .. char1 .. " was not recognized by the function disintegrate.")
		end
	end
	return output
end

function export.printDisintegration(frame)
	local output =
[[
{| class="wikitable"]]

	local row1, row2 = {}, { "\n|-" }
	
	local term = frame.args[1]
	local disintegration = disintegrate(term)
	local function tag(text)
		local lang = require("Module:languages").getByCode("ar")
		local sc = require("Module:scripts").getByCode("Arab")
		
		return require("Module:script utilities").tag_text(text, lang, sc)
	end
	
	for i, character in pairs(disintegration) do
		if character and character[1] then
			table.insert(row1, "\n| ")
			table.insert(row2, "\n| ")
			if character.type == "vowel" then
				table.insert(row1, tag(taTwiil .. character[1]))
			else
				table.insert(row1, tag(character[1]))
			end
			table.insert(row2, character.type)
		end
	end
	
	output = output .. table.concat(row1) .. table.concat(row2) .. "\n|}"
	
	return output
end

function export.printCodepoints(frame)
	local output = {}
	
	local term = frame.args[1] or frame:getParent().args[1]
	local functionName = frame.args["func"] or frame:getParent().args["func"] or "mw.ustring.char"
	local format = frame.args["format"] or frame:getParent().args["format"]
	
	local namespace = mw.title.getCurrentTitle().nsText
	if namespace == "Module" then
		functionName = "U"
	end
	
	codepoints = { mw.ustring.codepoint(term or "example", 1, -1) }
	for _, codepoint in pairs(codepoints) do
		local codepoint = "0x" .. string.format("%X", codepoint)
		if format == "concat" then
			codepoint = functionName .. "(" .. codepoint .. ")"
		end
			
		table.insert(output, codepoint)
	end
	
	local sep
	if format == "concat" then
		sep = " .. "
	else
		sep = ", "
	end
	local code = table.concat(output, sep)
	if format ~= "concat" then
		code = functionName .. "(" .. code .. ")"
	end
	
	return frame:expandTemplate{ title = "code", args = { "lua", code } }
end

local function ine(string)
	if string == "" or string == sukuun then
		return nil
	else
		return string
	end
end

function export.showPattern(frame)
	local lang = require("Module:languages").getByCode("ar")
	local sc = require("Module:scripts").getByCode("Arab")
	
	local function link(term)
		return require("Module:links").full_link{ term = term, lang = lang, sc = sc }
	end
	
	local function tag(term)
		if term then
			return require("Module:script utilities").tag_text(term, lang, sc)
		end
	end
	
	local function transliterate(term)
		translit = lang:transliterate(term)
		if translit then
			translit = gsub(translit, "^%-", "")
		end
		return translit
	end
	
	local pattern = frame.args[1] or "فَعَلَ"
	local basic_root = {
		"ف",
		"ع",
		"ل",
		"ق",
	}
	
	local elements = {}
	local patternTable = {}
	local rootTable = {}
	local tableIndex = 1
	local workingStr = gsub(pattern, "(" .. vowel .. ")" .. shadda, shadda .. "%1")

	local function insert(root, pattern)
		if ( root and root[1] ) or ( pattern and pattern[1] ) then
			if root and root[1] then
				table.insert(rootTable, tag(root[1]) .. ( root.tr and "<br>" .. root.tr or "") )
			else
				table.insert(rootTable, "")
			end
			if pattern and pattern[1] then
				table.insert(patternTable, tag(pattern[1]) .. ( pattern.tr and "<br>" .. pattern.tr or "" ) )
			else
				table.insert(patternTable, "")
			end
		end
		tableIndex = tableIndex + 1
	end
	
	for i, consonant in pairs(basic_root) do
		local _, index, before, consonant = find(
			workingStr,
			"^([^" .. consonant .. "]*)(" .. consonant .. shadda .. "?)"
			)
		
		if index then
			before = ine(before)
			consonant = ine(consonant)
			
			if before and find(before, "^" .. diacritic) then
				before = taTwiil .. before
			end
			
			table.insert(elements, before)
			table.insert(elements, consonant)
			
			local hasShadda
			if consonant then
				consonant, hasShadda = gsub(consonant, shadda, "")
				if hasShadda < 1 then
					hasShadda = false
				end
			end
			
			if before then
				before = gsub(before, taTwiil .. sukuun, "")
			end
			
			before = { before, tr = transliterate(before) }
			consonant = { consonant, tr = transliterate(consonant) }
			
			insert(nil, before )
			insert(consonant, nil)
			if hasShadda then
				insert(consonant, nil)
			end
			
			workingStr = sub(workingStr, index + 1)
		end
	end
	
	if workingStr and workingStr ~= "" then
		if find(workingStr, "^" .. diacritic) then
			workingStr = taTwiil .. workingStr
		end
		local tr = transliterate(workingStr)
		workingStr = tag(workingStr)
		table.insert(elements, workingStr)
		
		workingStr = { workingStr, tr = tr }
		
		insert(nil, workingStr)
	end
	
	elementsList = table.concat(elements, "، ")
	patternRow = "\n! pattern \n| " .. table.concat(patternTable, "\n| ")
	rootRow = "\n! root \n| " .. table.concat(rootTable, "\n| ")
	
	return link(pattern) .. ": " .. elementsList .. '<br>' .. '\n{| class="wikitable"' .. patternRow .. '\n|-' .. rootRow .. '\n|}'
end

function export.printRange(frame)
	local start = "ぁ" or frame.args[1]
	local finish = "ー" or frame.args[2]
	
	if type(start) == "string" then
		start = mw.ustring.codepoint(start)
	end
	
	if type(finish) == "string" then
		finish = mw.ustring.codepoint(finish)
	end
	
	if start > finish then
		start, finish = finish, start
	end
	
	local out = {}
	local direction = start > finish and -1 or 1
	-- mw.log(start, finish, start < finish, direction)
	local scriptPattern, script
	for i = start, finish, direction do
		if not scriptPattern and script then
			local scriptObject = require("Module:scripts").getByCode(script)
			if scriptObject and scriptObject:getCharacters() then
				scriptPattern = "[" .. scriptObject:getCharacters() .. "]"
			end
		end
		
		local character = mw.ustring.char(i)
		
		-- mw.log(character, scriptPattern and mw.ustring.match(character, scriptPattern))
		
		if scriptPattern and not mw.ustring.match(character, scriptPattern) then
			table.insert(out, '</span>')
		end
		if not scriptPattern or scriptPattern and not mw.ustring.match(character, scriptPattern)  then
			script = require("Module:scripts").charToScript(i)
			table.insert(out, '<span class="' .. script .. '">')
		end
		
		table.insert(out, character)
		
		if i == finish then
			table.insert(out, '</span>')
		end
	end
	
	out = table.concat(out)
	-- mw.log(out)
	return out
end

local dottedCircle = U(0x25CC)

local function addDottedCircle(char)
	if type(char) == "string" then
		if mw.ustring.len(char) ~= 1 then
			return nil
		end
		
		local codepoint = mw.ustring.codepoint(char)
		
		if require("Module:Unicode data").is_combining(codepoint) then
			return dottedCircle .. char
		else
			return char
		end
	else
		return nil
	end
end

local function showCombiningChars(text)
	if type(text) == "string" then
		local output = {}
		
		for char in mw.ustring.gmatch(text, ".") do
			table.insert(output, addDottedCircle(char))
		end
		
		return table.concat(output)
	end
end

function export.showConditions(frame)
	local languages, trackingLinks, tracked = {}, {}, {}
	local index = 1
	
	for code, conditions in pairs(require("Module:script utilities/tracking").allTrackingConditions) do
		local lang = require("Module:languages").getByCode(code) or require("Module:languages").getByCode("und")
		local sc
		if lang then
			languages[index] = lang
		end
		
		local path = conditions.path or code
		
		for i, condition in ipairs(conditions) do
			if not condition.redirect_to then
				local path = condition.path or path
				local chars = {}
				
				if type(condition.chars) ~= "table" then
					condition.chars = { condition.chars }
				end
				
				for i, char in pairs(condition.chars) do
					if type(char) == "string" then
						if lang:getCode() == "und" then
							sc = sc or require("Module:scripts").getByCode(require("Module:scripts").charToScript(mw.ustring.sub(char, 1, 1)))
						end
						
						mw.log(sc)
						
						char = require("Module:script utilities").tag_text(showCombiningChars(char), lang, sc)
						
						table.insert(chars, char)
					else
						mw.log("Table for " .. code .. " contains a condition in which the char variable is not a string.")
					end
				end
				
				if type(condition.code) == "string" then
					trackingLinks[index] = path .. "/" .. condition.code
					tracked[index] = table.concat(chars, ", ")
					
					index = index + 1
				else
					mw.log("Table for " .. code .. " contains a condition in which the code variable is not a string.")
				end
			end
		end
	end
	
	local output = {}
	for i, condition in pairs(tracked) do
		local text = {}
		if languages[i] then
			if i ~= 1 then
				table.insert(text, "\n")
			end
			table.insert(text, "* [[:Category:" .. languages[i]:getCategoryName() .. "|" .. languages[i]:getCanonicalName() .. "]]: ")
		end
		
		table.insert(text, condition)
		
		table.insert(text, " ([[Special:WhatLinksHere/Wiktionary:Tracking/script/" .. trackingLinks[i] .. "|" .. trackingLinks[i] .. "]])")
		
		table.insert(output, table.concat(text))
	end
	
	output = table.concat(output, ", ")
	
	return output
end

function export.showIPATracking(frame)
	local tracking_links = {}
	local tracked = {}
	local languages = {}
	local lang_index = 1
	
	for lang, conditions in pairs(require("Module:IPA/tracking").tracking) do
		if type(lang) == "string" then
			assert(type(conditions) == "table", "The conditions for " .. lang .. " should be a table")
			
			languages[lang_index] = mw.loadData("Module:languages/code to canonical name")[lang]
			assert(languages[lang_index] ~= nil, lang .. " is not a valid language code")
			
			for i, condition in ipairs(conditions) do
				lang_index = lang_index + 1
				
				local symbols = condition.symb
				local category = condition.cat
				assert(type(symbols) == "string" or type(symbols) == "table",
					"One of the tables contains a symb value that is not a string or table.")
				assert(type(category) == "string",
					"One of the tables contains a cat value that is not a string.")
				table.insert(tracking_links, lang .. "/" .. category)
				
				if type(symbols) ~= "table" then
					symbols = { symbols }
				end
				
				for i, symbol in pairs(symbols) do
					-- Check that "symbol" is a valid pattern.
					assert(pcall(mw.ustring.find, "test", symbol))
					
					symbols[i] = '<span class="IPA">' .. symbol .. '</span>'
				end
				
				table.insert(tracked, table.concat(symbols, ", ") )
			end
		end
	end
	
	local output = {}
	
	for i, tracking_link in pairs(tracking_links) do
		local text = {}
		if languages[i] then
			if i ~= 1 then
				table.insert(text, "\n")
			end
			table.insert(text, "* [[:Category:" .. languages[i] .. " language|" .. languages[i] .. "]]: ")
		end
		
		table.insert(text, "[[Special:WhatLinksHere/Wiktionary:Tracking/IPA/" .. tracking_link .. "|" .. tracking_link .. "]] (" ..
			tracked[i] .. ")")
		
		table.insert(output, table.concat(text))
	end
	
	return table.concat(output, ", ")
end

-- Much faster than mw.ustring.len.
local function getlength(str)
	local _, length = string.gsub(str, '[%z\1-\127\194-\244][\128-\191]*', '')
	return length
end

function export.navigation(frame)
	local cell, wrapper =
		[=[<div style="display: inline-block; border: 1px solid lightgray; padding: 0.1em;">[[%s|%02d]]</div>]=],
		[=[
<div style="display: inline-block; border: 1px solid darkgray; padding: 0.5em; text-align: center;">
%s
</div>]=]
	
	local i, module_text = 0, {}
	local module_name
	while true do
		i = i + 1
		module_name = ("Module:User:Erutuon/%02d"):format(i)
		if not mw.title.new(module_name).exists then break end
		table.insert(module_text, cell:format(module_name, i))
	end
	
	return wrapper:format(table.concat(module_text, "\n"))
end

function export.talk_page_archive_links(frame)
	local archives = require "Module:array"()
	
	-- Up to current year; see http://man7.org/linux/man-pages/man3/strftime.3.html.
	for year = 2004, os.date("%Y") do
		local talk_page_title = "User talk:Erutuon/" .. year
		
		if mw.title.new(talk_page_title).exists then
			archives:insert("[[" .. talk_page_title .. "|" .. year .. "]]")
		end
	end
	
	return archives:concat(" &ndash; ")
end

function export.percent_decode(str)
	require "libraryUtil".checkType("percent_decode", 1, str, "string")
	
	return (str:gsub("%%(%x%x)",
		function (hex)
			return string.char(tonumber(hex, 16))
		end))
end

function export.percent_decode_template(frame)
	local str = frame.args[1] or frame:getParent().args[1]
	
	return export.percent_decode(str)
end

function export.get_code_point_names(frame)
	local str = frame.args[1]
	local code_points = { select(2, pcall(mw.ustring.codepoint, str, 1, -1)) }
	if not code_points[1] then return "[]" end
	
	return mw.text.jsonEncode(require 'Module:fun'.map(
		require 'Module:Unicode data'.lookup_name, code_points))
end

local function gather_parameters(text, parameter_separator)
	local strings = mw.text.split(text, parameter_separator or "|")
	local index = 0
	return require "Module:fun".fold(
		function (parameters, parameter)
			local key, value = parameter:match("^(.-)=(.+)$")
			if key and value then
				key = tonumber(key) or key
			else
				index = index + 1
				key = index
				value = parameter
			end
			parameters[key] = value
			return parameters
		end,
		strings,
		{})
end

function export.multiple_trees(frame)
	local args = frame.args
	local text = args[1]
	local template_wrapper = (args.template_start or "<<<") .. "(.-)" .. (args.template_end or ">>>")
	local parameter_separator = args.parameter_separator or ","
	text = text:gsub(
		template_wrapper,
		function (fake_template)
			local parameters = gather_parameters(fake_template, parameter_separator, "=")
			return require "Module:family tree".show {
				args = parameters,
			}
		end)
	
	return text
end

return export