import re
import hashlib
from pywikibot import Site, Page
# The pages to read from / write to.
get_page_title = "Wiktionary:Language flags list"
set_page_title = "MediaWiki:Gadget-WiktCountryFlags.css"
# because this modifies global CSS, interface admin rights are required
# The L2 order. The following languages are always placed first, in this order.
always_first_l2 = ["Translingual", "English"]
# The CSS header.
css_header_template = """/*
* flags.css - adds flags to language headers.
* originally created by [[User:Prince Kassad]]
* this version is auto-generated from the list at [[{get_page_title}]]
* ([[Special:Permalink/{get_page_revision_id}]])
*/
"""
# The CSS rules to create for every language.
selector_template = ".ns-0 h2 #{language}::before, .ns-0 h2#{language}::before"
rule_template = """{selectors}{{padding-right:5px;
content:url({url});}}"""
# Regex to iterate over the language names and filenames
# in the CSS code produced by this script
css_language_filename_regex = (r"""h2 \#([^:]+)::before|\{[^;]+;\s*content:url\(['"]"""
+ re.escape("//upload.wikimedia.org/wikipedia/commons/thumb/")
+ r"[0-9a-f]/[0-9a-f]{2}/([^/]+)")
# Other settings.
default_size = "45px"
edit_summary = "(bot) update CSS based on flag list from [[" + get_page_title + "]]"
debug_mode = False
# Format for the input list.
list_regex = r"<!-- start list -->(.+?)<!-- end list -->"
line_regex = r"(.+?): (.+?)(?: (\d+px))?$"
# Code follows
size_dict = {} # to be defined later
def convert_line_to_item(match):
if not match:
return None
return (match.group(1), match.group(2).replace("_", " "), match.group(3) or default_size)
def get_url_from_file(file):
size = size_dict.get(file, default_size)
file = file.replace(" ", "_")
thumbfile = file
# SVG files do not have thumbnails, get as PNG
if thumbfile.endswith(".svg"):
thumbfile += ".png"
# right now the thumbnail path has parts of the MD5 hash of the file name
# this may change one day
md5 = hashlib.md5(file.encode('utf-8')).hexdigest().lower()
# make protocol-relative URL
return ("//upload.wikimedia.org/wikipedia/commons/thumb/"
"{l1}/{l2}/{file}/{size}-{thumbfile}").format(
size=size, file=file, thumbfile=thumbfile, l1=md5[:1], l2=md5[:2])
def quote_url_for_css(url):
# use double quotes if URL has single quotes
quote_character = '"' if "'" in url else "'"
# escape double quotes anyway
return "{0}{1}{0}".format(quote_character, url.replace('"', '%22'))
def make_css_rule(l2s, file):
selectors = [selector_template.format(language=l2.replace(" ", "_").replace("'", r"\'")) for l2 in l2s]
return rule_template.format(selectors=', '.join(selectors),
url=quote_url_for_css(get_url_from_file(file)))
def deduplicate(array):
"""Remove duplicates from a list but keeps its order of elements. All
but the first copy of any element is removed."""
seen = set()
return [x for x in array if not (x in seen or seen.add(x))]
def reverse_lookup(dict, value):
"""Return all keys that correspond to a specific value on a dictionary."""
return [key for key in dict.keys() if dict[key] == value]
def search_matching(array, predicate, start=0, stop=2147483647):
"""
Find the first index in the array for which the predicate taking in the
index and the corresponding item returns a truthy value, or -1 if none do.
"""
try:
return next(index for index, value
in enumerate(array[start:stop], start=start)
if predicate(index, value))
except StopIteration:
return -1
def convert_flag_dict_to_css(flags):
# convert flag dictionary to list of tuples (lang, file) sorted by lang
sorted_flags = list(sorted(flags.items(), key=lambda t: t[0]))
# bring configured L2s to front
for l2 in reversed(always_first_l2):
# try to find matching index
actual_index = search_matching(sorted_flags, lambda i, v: v[0] == l2)
if actual_index > 0: # found?
# move index to front of list
sorted_flags = ([sorted_flags[actual_index]]
+ sorted_flags[:actual_index]
+ sorted_flags[actual_index + 1:])
# remove duplicate files from the file list...
file_list = deduplicate(file for _, file in sorted_flags)
# ...and then make a reverse {file: [lang, lang, ...]} dictionary with it
dedup_flags = {file: reverse_lookup(flags, file) for file in file_list}
# convert the above dict into sets of rules and return them
return "\n\n".join(make_css_rule(l2s, file) for file, l2s
in dedup_flags.items())
def get_flag_dict_from_css(css_text):
"""Read CSS page to get dict of language to filename."""
flag_dict = {}
language_names = []
for match in re.finditer(css_language_filename_regex, css_text):
if match[1]:
language_names.append(match[1].replace("_", " ").replace(r"\'", "'"))
elif match[2]:
filename = match[2]
for name in language_names:
# reverse filename changes
flag_dict[name] = filename.replace("_", " ").replace("%22", '"')
language_names = []
return flag_dict
def get_flag_dict_diffs(old_flag_dict, new_flag_dict):
# added, changed, removed
return (
{lang: filename for lang, filename in new_flag_dict.items() if lang not in old_flag_dict},
{lang: filename for lang, filename in old_flag_dict.items() if lang in new_flag_dict and new_flag_dict[lang] != filename},
{lang: filename for lang, filename in old_flag_dict.items() if lang not in new_flag_dict}
)
def utf8_len(s):
return len(s.encode('utf-8'))
def add_flag_change_summary(edit_summary, css_text, new_flag_dict):
old_flag_dict = get_flag_dict_from_css(css_text)
added, changed, removed = get_flag_dict_diffs(old_flag_dict, flag_dict)
messages = []
def add_lang_message(verb, flag_dict):
if len(flag_dict) > 0:
messages.append(
verb + " "
+ ", ".join(lang for lang, filename in flag_dict.items()))
add_lang_message("add", added)
add_lang_message("change", changed)
add_lang_message("remove", removed)
edit_summary_addition = "; ".join(messages)
if len(edit_summary_addition) == 0:
return edit_summary + ": no changes identified"
else:
new_summary = edit_summary + ": " + edit_summary_addition
if utf8_len(new_summary) <= 800:
return new_summary
else:
return edit_summary + ": too many changes to list"
enwikt = Site("en", fam="wiktionary")
enwikt.login()
get_page = Page(enwikt, get_page_title)
get_page_revision_id = get_page.latest_revision_id
list_of_langs = re.search(list_regex, get_page.text, re.DOTALL).group(0)
flag_list = [convert_line_to_item(re.match(line_regex, line))
for line in list_of_langs.splitlines()]
flag_list = [x for x in flag_list if x]
size_dict = {filename: size for language, filename, size in flag_list}
flag_dict = {language: filename for language, filename, size in flag_list}
css_header = css_header_template.format(get_page_title = get_page_title, get_page_revision_id = get_page_revision_id)
css = css_header + convert_flag_dict_to_css(flag_dict)
set_page = Page(enwikt, set_page_title)
edit_summary = add_flag_change_summary(edit_summary, set_page.text, flag_dict)
if debug_mode:
print(css + "\n\n----\n\nsummary:\n" + edit_summary)
else:
set_page.text = css
set_page.save(summary=edit_summary, minor=False)