Module:User:Suzukaze-c/punctuation
- This module sandbox lacks a documentation subpage. You may create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • user page • user talk page • userspace
This is a private module sandbox of Suzukaze-c, for their own experimentation. Items in this module may be added and removed at Suzukaze-c's discretion; do not rely on this module's stability.
local export = {}
-- do i need to worry about numbers? like 1,000,000
export.spacing_instructions = {
-- ◇: there would be a space here in normal text
-- ◆: there would not be a space here in normal text
-- i.e. "◆.◇" means "." never has a leading space and always has a trailing space
["mul"] = {
[" "] = "◆;◇", ["…"] = "◆…◆",
["."] = "◆.◆", ["。"] = "◆.◇",
[","] = "◆,◇", ["、"] = "◆,◇",
["!"] = "◆!◇", ["?"] = "◆?◇",
[":"] = "◆:◇", [";"] = "◆;◇",
["("] = "◇(◆", [")"] = "◆)◇",
["|"] = "◆|◆", ["—"] = "◆—◆",
},
["zh"] = {
["《"] = "◇“◆", ["》"] = "◆”◇",
["『"] = "◇“◆", ["』"] = "◆”◇",
["「"] = "◇‘◆", ["」"] = "◆’◇",
["·"] = " ",
},
["ja"] = {
-- [[Module:ja/data]]
},
}
export.space_management_instructions = {
["◇◆"] = "", -- 「◆!◇◆”◇」→「!”」
["◆◇"] = "", -- 「◇‘◆◇(◆」→「‘(」
[" *◆ *"] = "", -- remove spaces near ◆
[" *◇ *"] = " ", -- keep spaces near ◇
}
-- return all the characters that can be converted for a language
function export.langRegexRange(lang)
local langs = { lang, 'mul' }
local chars = {}
for _, lang in ipairs(langs) do
if export.spacing_instructions[lang] then
for punctuation, _ in pairs(export.spacing_instructions[lang]) do
table.insert(chars, punctuation)
end
end
end
return table.concat(chars, '')
end
-- pad punctuation with spaces (no conversion)
function export.space(text, lang)
local out = {}
for char in mw.ustring.gmatch(text, '.') do
if export.convChar(char, lang) then
table.insert(out, ' ' .. char .. ' ')
else
table.insert(out, char)
end
end
return table.concat(out)
end
-- convert single punctuation to the ugly marked up forms
-- return false if impossible
function export.convChar(char, lang)
return ((export.spacing_instructions[lang] and export.spacing_instructions[lang][char]) or export.spacing_instructions['mul'][char] or false)
end
-- convert punctuation in text to the ugly marked up forms
function export.conv(text, lang)
local out = {}
for char in mw.ustring.gmatch(text, '.') do
table.insert(out, export.convChar(char, lang) or char)
end
return table.concat(out)
end
-- convert marked up forms to final form
function export.main(text)
for pattern, replacement in pairs(export.space_management_instructions) do
text = mw.ustring.gsub(text, pattern, replacement)
end
return mw.text.trim(text)
end
return export