Open main menu

Wiktionary β

Module:zh-forms

New Chinese Character box, used by {{zh-forms}}.


local export = {}
local m_zh = require("Module:zh")
local links = require("Module:links")
local lang = require("Module:languages")
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local len = mw.ustring.len

function export.make(frame)
	local params = {
		[1] = { list = true, allow_holes = true, allow_empty = true },
		["s"] = { list = true },
		["t"] = { list = true },
		["alt"] = {}, ["type"] = {}, ["delink"] = {}, ["lit"] = {}, ["note"] = {}, ["gloss"] = {}
	}
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local comp_type = args["type"]
	local s, t = {}, {}
	local annotation = {}
	if not frame:getParent().args["t"] then
		table.insert(t, 1, mw.title.getCurrentTitle().subpageText)
	end
	
	local function insert_st(set, text)
		if match(text, "^ ") or match(text, " $") then
			error("Please remove the leading and / or trailing space(s) in the 's' and 't' parameters.")
		end
		table.insert(set, text)
	end
	
	for i = 1, #args.s do insert_st(s, args.s[i]) end
	for i = 1, #args.t do insert_st(t, args.t[i]) end
	
	s.name = "simp"
	t.name = "trad"
	
	if #t ~= 1 and #s == 0 then
		table.insert(s, t[1])
	end
	
	function var_fmt(length, color)
		return '\n|-\n! style="padding: 0.' .. (length > 8 and '3' or '5') ..
			'em;border: 1px solid #aaa;background: #' .. (color or 'E0FFFF') ..
			';font-weight: normal;font-size: smaller;" colspan="2" |'
	end
	
	function char_gap(length)
		return '\n| style="padding: 0.' .. (length > 8 and '3' or '5') ..
			'em; background-color:white;border-bottom: 1px solid #aaa; font-size:x-large" lang="zh" class="Hani" | '
	end
	
	function char_last(length)
		return '\n| style="padding: 0.' .. (length > 8 and '3' or '5') ..
			'em; background-color:white;border-right: 1px solid #aaa;border' ..
			(length ~= 1 and '-bottom' or '') .. ': 1px solid #aaa; font-size:x-large" lang="zh" class="Hani" | '
	end
	
	function header(length, var_count)
		return ((length > 3 or var_count > 2) and ':{|' or '{| align=right') .. 
			' style="clear: right;margin: 1em;border-collapse: collapse;text-align: center"' ..
			(length ~= 1 and '\n|-\n! colspan=2|' or '')
	end
	
	function gloss_fmt(word, colspan, length)
		return '\n! style="padding: 0.' .. (length > 8 and '3' or '5') ..
			'em;border: 1px solid #aaa;background:#F5F5DC;font-weight: normal;font-size: 80%; width:' ..
			(length <= 8 and (30 * word + 30) or (25 * word + 25)) .. 'px" colspan=' .. (colspan or 1)  ..'|'
	end

	function form_fmt(text, length)
		for i,value in ipairs(text) do
			text[i] = links.language_link({ lang = lang.getByCode("zh"), term = value })
		end
		return length ~= 1 and ((length > 8 and '' or '<span style="font-size:140%">') ..
			'(<span lang="zh" class="Hani">' .. table.concat(text, "/") .. '</span>)' ..
			(length > 8 and '' or '</span>')) or ''
	end
	
	function char_fmt(text, length)
		return (#text ~= 1 and (char_gap(length) .. table.concat(text, char_gap(length), 1, #text-1)) or '') .. char_last(length) .. text[#text]
	end
	
	local test_word = t[1]
	local length = len(test_word)
	local word_division = {}
	local i = 1
	if comp_type then
		for index in mw.text.gsplit(comp_type, "", true) do
			if match(mw.ustring.sub(test_word, i, i), '[,%-]') then
				table.insert(word_division, { i, i } )
				i = i + 1
			elseif mw.ustring.sub(test_word, i, i) == '…' then
				table.insert(word_division, { i, i + 1 } )
				i = i + 2
			end
			table.insert(word_division, { i, i + index - 1 } )
			i = i + index
		end
		if i - 1 ~= len(gsub(test_word, '…+$', '')) and not match(table.concat(t) .. table.concat(s), "[⿰⿱⿲⿳⿴⿵⿶⿷⿸⿹⿺⿻]") then
			error("'type' parameter does not match word length.")
		end
	else
		for i = 1, length do
			table.insert(word_division, { i, i } )
		end
		decomposable = len(gsub(test_word, '…+$', '')) > 2 and true or false
	end
	
	local delink = {}
	if args["delink"] and args["delink"] ~= "" then
		if args["delink"] == "y" then
			for del_index, _ in ipairs(word_division) do
				delink[del_index] = "yes"
			end
		else
			for position in mw.text.gsplit(args["delink"], ",") do
				delink[tonumber(position)] = "yes"
			end
		end
	end
	
	local char_set = { ['simp'] = {}, ['trad'] = {} }
	local identity = #s == 0 and {t} or {s,t}
	local uncreated = {}
	
	for _, id in ipairs(identity) do
		for i, position in ipairs(word_division) do
			local char_string = ""
			for j = 1, #id do
				local word_form = mw.ustring.sub(id[j], position[1], position[2])
				if not mw.ustring.find(char_string, word_form) then
					char_string = (char_string ~= "" and (char_string .. '/') or "") .. word_form
				end
			end
			if not match(char_string, '[,%-]') then
				local hash = {}
				for thing in mw.text.gsplit(char_string, (delink[i] and "" or "/")) do
					table.insert(hash, links.language_link({ lang = lang.getByCode("zh"), term = thing }))
				end
				char_string = table.concat(hash, (delink[i] and "" or "/"))
			end
			table.insert(char_set[id.name], char_string)
		end
		for _, item in ipairs(id) do
			if not (mw.title.new(item) or {}).exists and item ~= mw.title.getCurrentTitle().subpageText then
				table.insert(uncreated, '"[[' .. item .. ']]"')
			end
		end
	end

	local scripts = { ['一-龯㐀-䶵'] = 'Hani' , ['a-zA-ZāēīōūĀĒĪŌŪa-zA-Z'] = 'Latn', ['0-90-9'] = 'Numb', ['Ͱ-Ͽ'] = 'Grek' }
	local script = {}
	for range, script_name in pairs(scripts) do
		if match(test_word, '[' .. range .. ']') then
			table.insert(script, script_name)
		end
	end

	if match(t[1], "([^─…]+)%1") and args['gloss'] ~= '-' and len(t[1]) < 7 then
		if gsub(comp_type or "", "1", "") == "" then
			table.insert(annotation, '[[Category:Chinese reduplications]]')
		elseif match(t[1], "([^…][^…]+)%1") or match(table.concat(char_set['trad'], " "), "([^─…%[%]a-z]+)%1") then
			for _, component in ipairs(char_set['trad']) do
				if len(component) > 1 then
					local comp_content = mw.title.new(links.remove_links(component)):getContent() or false
					if not comp_content or match(comp_content, "|gloss=-") or not match(comp_content, "==Chinese==") then
						evil = true
					end
				end
			end
			if not evil then table.insert(annotation, '[[Category:Chinese reduplications]]') end
		end
	end

	table.insert(annotation, #script > 1 and '[[Category:Chinese terms written in multiple scripts]]' or nil)
	table.insert(annotation, (decomposable and args['gloss'] ~= '-' and not args['note'] and not args['lit']) and '[[Category:Chinese entries with potentially decomposable titles]]' or nil)
	table.insert(annotation, (#uncreated > 0 and mw.title.getCurrentTitle().nsText == "") and '[[Category:Chinese terms with uncreated forms]]' .. 
		'<small class="attentionseeking">(' .. (#uncreated == 1 and 'This form' or 'These forms') ..
		' in the hanzi box ' .. (#uncreated == 1 and 'is' or 'are') .. ' uncreated: ' .. 
		table.concat(uncreated, ", ") .. '.)</small>' or nil)
	
	altforms = ""
	if args["alt"] or "" ~= "" then
		altform_list = {}
		for altform in mw.text.gsplit(args["alt"], ",") do
			local altdecomp = mw.text.split(altform, ":")
			local altdecomp2 = mw.text.split(altdecomp[1], "-")
			table.insert(altform_list, '<span style="white-space:nowrap;">' ..
				m_zh.link(nil, nil, { altdecomp2[1], tr = (altdecomp[2] or "-") }, mw.title.getCurrentTitle().subpageText) .. 
				(altdecomp2[2] and ' <span style="font-size:70%"><i>' .. altdecomp2[2] .. '</i></span>' or '') .. '</span>')
		end
		if #altform_list > 5 then
			altforms = '<div class="vsSwitcher vsToggleCategory-glosses"><span class="vsToggleElement">&nbsp;</span>' ..
				'<div class="vsShow" style="display:none">' .. table.concat(altform_list, "<br>", 1, 5) ..
				'</div><div class="vsHide">' .. table.concat(altform_list, "<br>") .. '</div></div>'
		else
			altforms = table.concat(altform_list, "<br>")
		end
		altforms = var_fmt(length, 'F0FFE0') .. 'alt. forms' .. mw.ustring.sub(char_last(length), 1, -45)
			.. 'font-size:90%; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'
			.. ' colspan="' .. #word_division .. '"|' .. altforms
	end
	
	anagram = ""
	if len(t[1]) == 2 and not match(t[1], "(.)%1") then
		anagram_link = mw.ustring.sub(t[1], 2, 2) .. mw.ustring.sub(t[1], 1, 1)
		anagram_content = mw.title.new(anagram_link):getContent() or false
		if anagram_content and match(anagram_content, "==Chinese==") then
			anagram = var_fmt(length, 'F0FFE0') .. 'anagram' .. mw.ustring.sub(char_last(length), 1, -45)
			.. 'font-size:90%; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'
			.. ' colspan="' .. #word_division .. '"|' .. m_zh.link(nil, nil, { anagram_link, tr = "-" }, mw.title.getCurrentTitle().subpageText)
		end
	end
	
	literal = (args["lit"] or args["note"] or "" ~= "") and '\n|-' .. 
		gloss_fmt(length, #word_division + 2, length) .. 
		((args["lit"] or "" ~= "") and '<i>Literally:</i> “' .. args["lit"] .. 
		(match(args["lit"], "%.$") and "”" or "”.") or args["note"]) or ""
	
	local gloss = {}
	if args['gloss'] == '-' then
		gloss = { gloss_fmt(length * 1.6, #word_division, length) .. '<i>phonetic</i>' }
	elseif length == 1 then
		gloss = {}--{ gloss_fmt(1.6, #word_division) .. '-' }
	else
		for i, position in ipairs(word_division) do
			character = mw.ustring.sub(t[1], position[1], position[2])
			
			local gloss_text = args[1][i]
			-- Load glosses module if no gloss was supplied and the word is a single character.
			if not gloss_text and position[1] == position[2] then
				gloss_text = mw.loadData("Module:zh/data/glosses").glosses[character]
				if not gloss_text and match(character, "^[一-龯㐀-䶵]+$") then
					require('Module:debug').track('zh-forms/no gloss found for Chinese character')
				end
			end
			
			if not gloss_text then
				gloss_text = ""
			end
			
			--[[
				To ensure that suffixes are not broken up between lines, like this:
				-
				ist
			]]
			if mw.ustring.find(gloss_text, "-", nil, true) then
				local nonbreaking_hyphen = mw.ustring.char(0x2011)
				gloss_text = mw.ustring.gsub(gloss_text, "^%-", nonbreaking_hyphen)
				gloss_text = mw.ustring.gsub(gloss_text, "(%s)%-", "%1" .. nonbreaking_hyphen)
			end
		
			if gloss_text == "" and position[2] > position[1] then
				local content = mw.title.new(character):getContent() or false
				if content then
					local literally = match(content, 'zh%-forms[^%}]*%|lit%=([^%{%|%}]+)[%|%}]')
					gloss_text = (literally and literally .. "; " or "") ..
						(match(mw.text.split(content, '%-%-%-%-')[1] .. '\n', '\n#([^#\n]+)\n') or "")
						
					local function replace_gloss(text)
						text = gsub(text, ' %(%{%{taxlink[^%}%)]+%}%}%)', '')
						text = gsub(text, '%{%{zh%-erhua form of%|([^%}]+)%}%}', '%1')
						text = gsub(text, '%{%{zh%-[^dm%|][^%|]+%|[^%|]+%|([^\n]+)%}%}', '%1')
						text = gsub(text, '%{%{vern', '{{w')
						text = gsub(text, '%{%{w%|([^%|%}]+)%|?([^%|%}]*)%}%}', function(w_link, w_display)
							return '[[w:'..w_link..'|'..(w_display~='' and w_display or w_link)..']]' end)
						text = gsub(text, '( ?)([%{%(]+[^%}%)]+[%}%)]+)', function(space, captured)
							wiki_link = 
								match(captured, "%{%{taxlink%|([^%|]+)[^%}]+%}%}") and
								 "''" .. match(captured, "%{%{taxlink%|([^%|]+)[^%}]+%}%}") .. "''" or 
								match(captured, "%{%{vern%|(.+)%}%}") or false
							return wiki_link and space..wiki_link or "" end)
						return text
					end
					gloss_text = replace_gloss(gloss_text)
					gloss_text = replace_gloss(gloss_text)
					if gloss_text == "" and match(character, "^[一-龯㐀-䶵]+$") then require('Module:debug').track('zh-forms/no gloss found but entry exists') end
				else
					if gloss_text == "" and match(character, "^[一-龯㐀-䶵]+$") then require('Module:debug').track('zh-forms/no gloss found with a nonexistent entry') end
				end
			end
			if len(gsub(gloss_text, '[^;]', '')) > 2 then
				gloss_text = '<div class="vsSwitcher vsToggleCategory-glosses"><span class="vsToggleElement">&nbsp;</span><div class="vsShow" style="display:none">' ..
					match(gloss_text, '^[^;]+;[^;]+;[^;]+') .. '</div><div class="vsHide">' .. gloss_text .. '</div></div>'
			end
			local word_length = match(character, '[,…%-]') and 0 or 
				(delink[i] and len(character) or len(character) * (len(gsub(char_set['trad'][i], '[^/]', '')) + 1))
			table.insert(gloss, gloss_fmt(word_length, 1, length) .. gloss_text)
		end
	end
		
	local PAGENAME = mw.title.getCurrentTitle().text
	local content = mw.title.new(PAGENAME):getContent()
	
	if content then
		local applicable_pos = { ["Noun"] = 1, ["Verb"] = 1, ["Adjective"] = 1, ["Adverb"] = 1, 
			["Definitions"] = 1, ["Pronunciation"] = 1 }
		local previous_level = 2
		for heading in mw.ustring.gmatch(content, "\n%=%=+[^%=]+%=%=+") do
			local _, current_level = gsub(heading, "%=", "")
			current_level = current_level / 2
			if subheading_wanted and current_level ~= previous_level + 1 then
				local heading_text = match(heading, "%=%=+([^%=]+)%=%=+")
				if applicable_pos[heading_text] then
					error("The heading \"===Etymology ''n''===\" or \"===Pronunciation ''n''===\" must be followed a subheading one level lower.")
				end
			end
			previous_level = current_level
			subheading_wanted = match(heading, " [0-9]%=%=+") and true or false
		end
	end
	
	output_text = header(length, math.max(#t, #s)) .. table.concat(gloss, "") .. 
		(#identity == 1 and
			(var_fmt(length) .. '[[Simplified Chinese|simp.]] and [[Traditional Chinese|trad.]]<br>' .. 
				form_fmt(t, length) .. char_fmt(char_set['trad'], length))
		or
			 var_fmt(length) .. '[[Traditional Chinese|trad.]] ' .. 
			 	form_fmt(t, length) .. char_fmt(char_set['trad'], length) .. 

			(var_fmt(length) .. '[[Simplified Chinese|simp.]] ' ..
				form_fmt(s, length) .. char_fmt(char_set['simp'], length))

		) .. altforms .. anagram .. literal .. '\n|}' .. table.concat(annotation)
	
	return output_text
end

return export