Open main menu

Wiktionary β

Module:User:Erutuon/sandbox

This module contains a lot of random functions created by User:Erutuon. Some have served a purpose, some have continuing use, and others were one-time experiments.

TestcasesEdit

  • no no
  • 2017 September 24
  • فَعَلَ (faʿala): ف، ـَ، ع، ـَ، ل، ـَ
pattern ـَ
a
ـَ
a
ـَ
a
root ف
f
ع
ʿ
ل
l
pattern تَ
ta
ـِي
ī
root ف
f
ع
ʿ
ل
l
pattern ـَ
a
ـَ
a
ـَ
a
root ف
f
ع
ʿ
ع
ʿ
ل
l
pattern ـَ
a
ـَا
ā
root ف
f
ع
ʿ
ع
ʿ
ل
l
pattern اِ
i
تَ
ta
ـَ
a
ـَ
a
root ف
f
ع
ʿ
ل
l
pattern يَسْتَ
yasta
ـِ
i
ـُ
u
root ف
f
ع
ʿ
ل
l
the rest
  • U(0x65, 0x78, 0x61, 0x6D, 0x70, 0x6C, 0x65)
  • γράμμα → grámma
    бу́ква → búkva
    حَرْف → ḥarf
  • (empty), a, bbc, (empty)
  • (empty), a, bbbbbc, (empty)
  • (empty), a, c, (empty)
  • Аа|Бб|Вв|Гг|Дд|Ее|Ёё|Жж|Зз|Іі|Йй|Кк|Лл|Мм|Нн|Оо|Пп|Рр|Сс|Тт|Уу|Ўў|Фф|Хх|Цц|Чч|Шш|Ыы|Ьь|Ээ|Юю|Яя
  • table, false, nil
ء ــَ ل ــِ ف
consonant vowel consonant vowel consonant

local export = {}

local m_IPA = mw.loadData("Module:IPA/data/symbols")
local tag_text = require("Module:script utilities").tag_text

local gsub = mw.ustring.gsub
local find = mw.ustring.find
local gmatch = mw.ustring.gmatch
local match = mw.ustring.match
local sub = mw.ustring.sub
local U = mw.ustring.char

local function get(term, index)
	return sub(term, index, index)
end

local fatHa = U(0x64E)
local fatHatan = U(0x64B)
local kasratan = U(0x64D)
local Dammatan = U(0x64C)
local kasra = U(0x650)
local Damma = U(0x64F)
local sukuun = U(0x652)
local shadda = U(0x651)
local vowel_diacritics = fatHa .. kasra .. Damma .. fatHatan .. kasratan .. Dammatan
local vowel = "[" .. vowel_diacritics .. "]"
local diacritic = "[" .. vowel_diacritics .. sukuun .. shadda .. "]"
local alif = "ا"
local waaw = "و"
local yaa = "ي"
local alif_maqSuura = "ى"
local madda = "آ"
local waSla = "ٱ"
local hamza = "ء"
local alif_hamza_above = "أ"
local alif_hamza_below = "إ"
local yaa_hamza = "ئ"
local waaw_hamza = "ؤ"
local taa_marbuuTa = "ة"
local taTwiil = U(0x640)

local graphemes = {
	["consonant"] = { "ب", "ت", "ث", "ج", "ح", "خ", "د", "ذ", "ر", "ز", "س", "ش", "ص", "ض", "ط", "ظ", "ع", "غ", "ق", "ف", "ل", "ك", "م", "ن", "و", "ي", "ء", },
	 ["vowel"] = {
		fatHa,
		fatHa .. alif,
		fatHa .. alif_maqSuura,
		Damma,
		Damma .. waaw,
		kasra,
		kasra .. yaa,
		fatHatan,
		fatHatan .. alif,
		alif .. fatHatan,
		Dammatan,
		kasratan,
		},
	["diacritic"] = {
		sukuun, shadda,
	},
	["sequence"] = {
		madda,
	},
	["other"] = {
		waSla,
	},
}

data = {}

for kind, list in pairs(graphemes) do
	for _, grapheme in pairs(list) do
		data[grapheme] = {}
		data[grapheme].type = kind
	end
end

local function getType(char)
	local charData = data[char]
	if charData and charData.type then
		return charData.type
	else
		return nil
	end
end

local function convertHamza(term)
	term = gsub(term,
		"[" .. alif_hamza_above .. alif_hamza_below .. yaa_hamza .. waaw_hamza .. "]",
		hamza)
	term = gsub(term, madda, hamza .. fatHa .. alif)
	return term
end

local function add(list, chars, term)
	table.insert(list, chars)
	length = mw.ustring.len(chars)
	term = sub(term, length + 1)
	return list, term
end

local function disintegrate(term)
	local output = {}
	
	term = convertHamza(term)
	
	while mw.ustring.len(term) > 0 do
		local char1 = get(term, 1) or ""
		local char2 = get(term, 2) or ""
		local chars = sub(term, 1, 2) or ""
		
		if mw.ustring.len(char1) > 1 then
			error(char1 .. " isn't a single character.")
		end
		
		if getType(char1) == "consonant" then
			if char2 == shadda then
				table.insert(output, { char1, type = getType(char1) })
				table.insert(output, { char1, type = getType(char1) } )
				term = sub(term, 3)
			else
				table.insert(output, { char1, type = getType(char1) } )
				term = sub(term, 2)
			end
		elseif getType(chars) == "vowel" then
			table.insert(output, { taTwiil .. chars, type = getType(chars) } )
			term = sub(term, 3)
		elseif getType(char1) == "vowel" then
			table.insert(output, { taTwiil .. char1, type = getType(char1) } )
			term = sub(term, 2)
		elseif char1 == sukuun then
			term = sub(term, 2)
		elseif char1 == madda then
			error("Maddas should have been replaced by convertHamza.")
		else
			error("The character " .. char1 .. " was not recognized by the function disintegrate.")
		end
	end
	return output
end

function export.printDisintegration(frame)
	local output =
[[
{| class="wikitable"]]

	local row1, row2 = {}, { "\n|-" }
	
	local term = frame.args[1]
	local disintegration = disintegrate(term)
	local function tag(text)
		local lang = require("Module:languages").getByCode("ar")
		local sc = require("Module:scripts").getByCode("Arab")
		
		return tag_text(text, lang, sc)
	end
	
	for i, character in pairs(disintegration) do
		if character and character[1] then
			table.insert(row1, "\n| ")
			table.insert(row2, "\n| ")
			if character.type == "vowel" then
				table.insert(row1, tag(taTwiil .. character[1]))
			else
				table.insert(row1, tag(character[1]))
			end
			table.insert(row2, character.type)
		end
	end
	
	output = output .. table.concat(row1) .. table.concat(row2) .. "\n|}"
	
	return output
end

function export.tostring(value)
	output = {}
	table.insert(output, { 1, 2, 3 } )
	table.insert(output, false)
	
	return tostring(output[1]) .. ", " .. tostring(output[2]) .. ", " .. tostring(output[3])
end

function export.Hebrew(frame)
	return "[" .. U(0x0590) .. "-" .. U(0x05FF) .. U(0xFB1D) .. "-" .. U(0xFB4F) .. "]"
end

function export.printCodepoints(frame)
	local output = {}
	
	local term = frame.args[1] or frame:getParent().args[1]
	local functionName = frame.args["func"] or frame:getParent().args["func"] or "mw.ustring.char"
	local format = frame.args["format"] or frame:getParent().args["format"]
	
	local namespace = mw.title.getCurrentTitle().nsText
	if namespace == "Module" then
		functionName = "U"
	end
	
	codepoints = { mw.ustring.codepoint(term or "example", 1, -1) }
	for _, codepoint in pairs(codepoints) do
		local codepoint = "0x" .. string.format("%X", codepoint)
		if format == "concat" then
			codepoint = functionName .. "(" .. codepoint .. ")"
		end
			
		table.insert(output, codepoint)
	end
	
	local sep
	if format == "concat" then
		sep = " .. "
	else
		sep = ", "
	end
	local code = table.concat(output, sep)
	if format ~= "concat" then
		code = functionName .. "(" .. code .. ")"
	end
	
	return frame:expandTemplate{ title = "code", args = { "lua", code } }
end

function export.addLower(frame)
	local output, uppercase, lowercase = {}, {}, {}
	local characters = frame.args[1]
	local separator = frame.args["separator"] or ", "
	
	local i = 1
	for uppercase_letter in mw.ustring.gmatch(characters, "%u") do
		local lowercase_letter = mw.ustring.lower(uppercase_letter)
		uppercase[i] = uppercase_letter
		lowercase[i] = lowercase_letter
		i = i + 1
	end
	
	local i = 1
	while uppercase[i] or lowercase[i] do
		table.insert(output, ( uppercase[i] or "" ) .. ( lowercase[i] or ""))
		if uppercase[i + 1] or lowercase[i + 1] then
			table.insert(output, separator)
		end
		i = i + 1
	end
	
	return table.concat(output)
end

function export.match(frame)
	local text = frame.args[1]
	local matches = { mw.ustring.match(text, "(%)?)(%l?)(%l+)(%l?)") }
	
	for k, match in pairs(matches) do
		if match == "" then
			match = "(''empty'')"
		end
		matches[k] = tostring(match)
	end
	
	return table.concat(matches, ", ")
end

local function addContent(item, list)
	if type(list) == "table" then
		if list[item] then
			return list[item]
		else
			return '<span class="error">no content for "' .. tostring(item) .. '"</span>'
		end
	end
end

function export.replacement(frame)
	local output = {}
	
	local params = {
		[1] = { list = true }
	}
	
	local args = require("Module:parameters").process(frame.args, params)
	local items = args[1]
	
	local template = "<<term>> &rarr; <<translit>>"
	
	for _, item in ipairs(items) do
		local lang, term = mw.ustring.match(item, "^([^:]+):(.+)$")
		lang = require("Module:languages").getByCode(lang)
		local translit = lang:transliterate(term)
		term = tag_text(term, lang)
		local content = { term = term, translit = translit, }
		local result = gsub(template, "<<([^>]+)>>", function(a) return addContent(a, content) end)
		table.insert(output, result)
	end
	
	return table.concat(output, "<br>")
end

local function ine(string)
	if string == "" or string == sukuun then
		return nil
	else
		return string
	end
end

function export.showPattern(frame)
	local lang = require("Module:languages").getByCode("ar")
	local sc = require("Module:scripts").getByCode("Arab")
	
	local function link(term)
		return require("Module:links").full_link{ term = term, lang = lang, sc = sc }
	end
	
	local function tag(term)
		if term then
			return require("Module:script utilities").tag_text(term, lang, sc)
		end
	end
	
	local function transliterate(term)
		translit = lang:transliterate(term)
		if translit then
			translit = gsub(translit, "^%-", "")
		end
		return translit
	end
	
	local pattern = frame.args[1] or "فَعَلَ"
	local basic_root = {
		"ف",
		"ع",
		"ل",
		"ق",
	}
	
	local elements = {}
	local patternTable = {}
	local rootTable = {}
	local tableIndex = 1
	local workingStr = gsub(pattern, "(" .. vowel .. ")" .. shadda, shadda .. "%1")

	local function insert(root, pattern)
		if ( root and root[1] ) or ( pattern and pattern[1] ) then
			if root and root[1] then
				table.insert(rootTable, tag(root[1]) .. ( root.tr and "<br>" .. root.tr or "") )
			else
				table.insert(rootTable, "")
			end
			if pattern and pattern[1] then
				table.insert(patternTable, tag(pattern[1]) .. ( pattern.tr and "<br>" .. pattern.tr or "" ) )
			else
				table.insert(patternTable, "")
			end
		end
		tableIndex = tableIndex + 1
	end
	
	for i, consonant in pairs(basic_root) do
		local _, index, before, consonant = find(
			workingStr,
			"^([^" .. consonant .. "]*)(" .. consonant .. shadda .. "?)"
			)
		
		if index then
			before = ine(before)
			consonant = ine(consonant)
			
			if before and find(before, "^" .. diacritic) then
				before = taTwiil .. before
			end
			
			table.insert(elements, before)
			table.insert(elements, consonant)
			
			local hasShadda
			if consonant then
				consonant, hasShadda = gsub(consonant, shadda, "")
				if hasShadda < 1 then
					hasShadda = false
				end
			end
			
			if before then
				before = gsub(before, taTwiil .. sukuun, "")
			end
			
			before = { before, tr = transliterate(before) }
			consonant = { consonant, tr = transliterate(consonant) }
			
			insert(nil, before )
			insert(consonant, nil)
			if hasShadda then
				insert(consonant, nil)
			end
			
			workingStr = sub(workingStr, index + 1)
		end
	end
	
	if workingStr and workingStr ~= "" then
		if find(workingStr, "^" .. diacritic) then
			workingStr = taTwiil .. workingStr
		end
		local tr = transliterate(workingStr)
		workingStr = tag(workingStr)
		table.insert(elements, workingStr)
		
		workingStr = { workingStr, tr = tr }
		
		insert(nil, workingStr)
	end
	
	elementsList = table.concat(elements, "، ")
	patternRow = "\n! pattern \n| " .. table.concat(patternTable, "\n| ")
	rootRow = "\n! root \n| " .. table.concat(rootTable, "\n| ")
	
	return link(pattern) .. ": " .. elementsList .. '<br>' .. '\n{| class="wikitable"' .. patternRow .. '\n|-' .. rootRow .. '\n|}'
end

function export.date(frame)
	local language = mw.language.getContentLanguage()
	local function format(code, timestamp)
		return language:formatDate(code, timestamp)
	end
	
	return format("'''Y''' F j")
end

function export.printEntryNameReplacements(frame)
	entry_name = {
		from = {"[ȀÀȂÁĀÃ]", "[ȁàȃáāã]", "[ȄÈȆÉĒẼ]", "[ȅèȇéēẽ]", "[ȈÌȊÍĪĨ]", "[ȉìȋíīĩ]", "[ȌÒȎÓŌÕ]", "[ȍòȏóōõ]", "[ȐȒŔ]", "[ȑȓŕ]", "[ȔÙȖÚŪŨ]", "[ȕùȗúūũ]", "Ѐ", "ѐ", "[ӢЍ]", "[ӣѝ]", "[Ӯ]", "[ӯ]", GRAVE, ACUTE, DGRAVE, INVBREVE, MACRON, TILDE},
		to   = {"A"	  , "a"	  , "E"	  , "e"	  , "I"	  , "i"	  , "O"	  , "o"	  , "R"	, "r"	, "U"	  , "u"	  , "Е", "е", "И"   , "и", "У", "у"   }}
	
	local output = {}
	
	for i, regex in pairs(entry_name.from) do
		table.insert(output, '["' .. regex .. '"]' .. (mw.ustring.len(regex) < 4 and '\t\t\t' or mw.ustring.len(regex) < 6 and '\t\t' or '\t' ) .. '= "' .. ( entry_name.to[i] or "" ) .. '"')
	end
	
	return frame:extensionTag{ name = "syntaxhighlight", content = table.concat(output, ",\n"), args = { lang = "html" } }
end

function export.testPreprocess(frame)
	local function containsTemplate(text)
		if mw.ustring.match(text, "{{[^}]'+}}") then
			return "yes"
		else
			return "no"
		end
	end
	
	return containsTemplate(frame.args[1]) .. "\t\t" .. containsTemplate(frame:preprocess(frame.args[1]))
end

function export.printRange(frame)
	local start = "ぁ" or frame.args[1]
	local finish = "ー" or frame.args[2]
	
	if type(start) == "string" then
		start = mw.ustring.codepoint(start)
	end
	
	if type(finish) == "string" then
		finish = mw.ustring.codepoint(finish)
	end
	
	if start > finish then
		start, finish = finish, start
	end
	
	local out = {}
	local direction = start > finish and -1 or 1
	-- mw.log(start, finish, start < finish, direction)
	local scriptPattern, script
	for i = start, finish, direction do
		if not scriptPattern and script then
			local scriptObject = require("Module:scripts").getByCode(script)
			if scriptObject and scriptObject:getCharacters() then
				scriptPattern = "[" .. scriptObject:getCharacters() .. "]"
			end
		end
		
		local character = mw.ustring.char(i)
		
		-- mw.log(character, scriptPattern and mw.ustring.match(character, scriptPattern))
		
		if scriptPattern and not mw.ustring.match(character, scriptPattern) then
			table.insert(out, '</span>')
		end
		if not scriptPattern or scriptPattern and not mw.ustring.match(character, scriptPattern)  then
			script = require("Module:Unicode data").get_script(i)
			table.insert(out, '<span class="' .. script .. '">')
		end
		
		table.insert(out, character)
		
		if i == finish then
			table.insert(out, '</span>')
		end
	end
	
	out = table.concat(out)
	-- mw.log(out)
	return out
end

local U = mw.ustring.char
local dottedCircle = U(0x25CC)

local function addDottedCircle(char)
	if type(char) == "string" then
		if mw.ustring.len(char) ~= 1 then
			return nil
		end
		
		local codepoint = mw.ustring.codepoint(char)
		
		if require("Module:Unicode data").is_combining(codepoint) then
			return dottedCircle .. char
		else
			return char
		end
	else
		return nil
	end
end

local function showCombiningChars(text)
	if type(text) == "string" then
		local output = {}
		
		for char in mw.ustring.gmatch(text, ".") do
			table.insert(output, addDottedCircle(char))
		end
		
		return table.concat(output)
	end
end

function export.showConditions(frame)
	local languages, trackingLinks, tracked = {}, {}, {}
	local index = 1
	
	for code, conditions in pairs(require("Module:script utilities/tracking").allTrackingConditions) do
		local lang = require("Module:languages").getByCode(code) or require("Module:languages").getByCode("und")
		local sc
		if lang then
			languages[index] = lang
		end
		
		local path = conditions.path or code
		
		for i, condition in ipairs(conditions) do
			if not condition.redirect_to then
				local path = condition.path or path
				local chars = {}
				
				if type(condition.chars) ~= "table" then
					condition.chars = { condition.chars }
				end
				
				for i, char in pairs(condition.chars) do
					if type(char) == "string" then
						if lang:getCode() == "und" then
							sc = sc or require("Module:scripts").getByCode(require("Module:Unicode data").get_script(mw.ustring.sub(char, 1, 1)))
						end
						
						mw.log(sc)
						
						char = require("Module:script utilities").tag_text(showCombiningChars(char), lang, sc)
						
						table.insert(chars, char)
					else
						mw.log("Table for " .. code .. " contains a condition in which the char variable is not a string.")
					end
				end
				
				if type(condition.code) == "string" then
					trackingLinks[index] = path .. "/" .. condition.code
					tracked[index] = table.concat(chars, ", ")
					
					index = index + 1
				else
					mw.log("Table for " .. code .. " contains a condition in which the code variable is not a string.")
				end
			end
		end
	end
	
	local output = {}
	for i, condition in pairs(tracked) do
		local text = {}
		if languages[i] then
			if i ~= 1 then
				table.insert(text, "\n")
			end
			table.insert(text, "* [[:Category:" .. languages[i]:getCategoryName() .. "|" .. languages[i]:getCanonicalName() .. "]]: ")
		end
		
		table.insert(text, condition)
		
		table.insert(text, " ([[Special:WhatLinksHere/Template:tracking/script/" .. trackingLinks[i] .. "|" .. trackingLinks[i] .. "]])")
		
		table.insert(output, table.concat(text))
	end
	
	output = table.concat(output, ", ")
	
	return output
end

function export.showIPATracking(frame)
	local tracking_links = {}
	local tracked = {}
	local languages = {}
	local lang_index = 1
	
	for lang, conditions in pairs(require("Module:IPA/tracking").tracking) do
		if type(lang) == "string" then
			languages[lang_index] = mw.loadData("Module:languages/code to canonical name")[lang]
			for i, condition in ipairs(conditions) do
				lang_index = lang_index + 1
				
				table.insert(tracking_links, lang .. "/" .. condition.cat)
				
				local symbols = condition.symb
				
				if type(symbols) ~= "table" then
					mw.log("symbols not table: " .. tostring(symbols) )
					symbols = { symbols }
				end
				
				for i, symbol in pairs(symbols) do
					symbols[i] = '<span class="IPA">' .. symbol .. '</span>'
				end
				
				table.insert(tracked, table.concat(symbols, ", ") )
			end
		end
	end
	
	local output = {}
	
	for i, tracking_link in pairs(tracking_links) do
		local text = {}
		if languages[i] then
			if i ~= 1 then
				table.insert(text, "\n")
			end
			table.insert(text, "* [[:Category:" .. languages[i] .. " language|" .. languages[i] .. "]]: ")
		end
		
		table.insert(text, "[[Special:WhatLinksHere/Template:tracking/IPA/" .. tracking_link .. "|" .. tracking_link .. "]] (" ..
			tracked[i] .. ")")
		
		table.insert(output, table.concat(text))
	end
	
	return table.concat(output, ", ")
end

return export