User:Flubot/Adding DEFAULTSORT key to Greek words

This script is an adaptation of the one used on el.wiktionary to add sorting keys to Greek words. It adds the DEFAULTSORT key before the interwikis.

defaultsort.py

edit
# -*- coding: utf-8  -*-
u"""

Much of this code was stolen from cosmetic_changes.py.

"""
__version__ = '$Id: default_kleidaTaksinomhshs.py 4260 2007-09-12 22:12:11Z wikipedian $'
import wikipedia, pagegenerators, string
import sys
import re

warning = u"""If you are running this bot outside
en.wiktionary, please reconsider"""

docuReplacements = {
    '&params;': pagegenerators.parameterHelp,
    '&warning;': warning,
}

# Summary message when using this module as a stand-alone script
msg_standalone = {
    'en': u'Bot: defaultsort key',
}

class EpiloghKleidaTaksinomhshsToolkit:
    def __init__(self, site, title, exceptions = [], debug = False):
        self.site = site
        self.debug = debug
        self.title = title
        self.exceptions = exceptions

    def allagh(self, keimeno):
        """
        Given a wiki source code text, returns the cleaned up version.
        FIXME σύνοψη εδώ
        """
        defaultsort_templ=re.compile(u'\{\{DEFAULTSORT\:([^\}]*)\}\}')
        if defaultsort_templ.search(keimeno):
        	kleidakeimenou=defaultsort_templ.search(keimeno).group(1)
        	protypo=self.paragwghKleidaTaksinomhshsProtypo()
        	kleida=defaultsort_templ.search(protypo).group(1)
        	if kleida==kleidakeimenou:
        		return keimeno
        palioKeimeno = keimeno
        protypo = u'{{DEFAULTSORT}}'
        # αφαιρούμε την επόμενη κενή γραμμή μαζί με το πρότυπο αν υπάρχει
        protyporegexp = u'\{\{DEFAULTSORT\:[^\}]*\}\}(\r\n\r\n|$)'
        keimeno = self.removeKleidaTaksinomhshsProtypo(keimeno,protyporegexp)
        protyporegexp1 = u'\{\{DEFAULTSORT\:[^\}]*\}\}\r\n'
        keimeno = self.removeKleidaTaksinomhshsProtypo(keimeno,protyporegexp1)
        keimeno = self.addKleidaTaksinomhshsProtypo(keimeno,protypo)
        if self.debug:
            wikipedia.showDiff(palioKeimeno, keimeno)
        return keimeno

    def paragwghKleidaTaksinomhshsProtypo(self):

        parametros = self.title().lower()

        mtg_apo = u'ά έ ή ί ϊ ΐ ό ύ ϋ ΰ ώ ς ά έ ή ί ό ύ ώ ᾴ ῄ ῴ ὰ ὲ ὴ ὶ ὸ ὺ ὼ ᾲ ῂ ῲ ᾶ ῆ ῖ ῦ ῶ ᾷ ῇ ῷ ῗ ῧ ῒ ῢ ΐ ΰ ᾳ ῃ ῳ ἀ ἐ ἠ'
        mtg_se = u'α ε η ι ι ι ο υ υ υ ω σ α ε η ι ο υ ω α η ω α ε η ι ο υ ω α η ω α η ι υ ω α η ω ι υ ι υ ι υ α η ω α ε η'

        mtg_apo = mtg_apo + u' ἰ ὀ ὐ ὠ ᾀ ᾐ ᾠ ἄ ἔ ἤ ἴ ὄ ὔ ὤ ᾄ ᾔ ᾤ ἂ ἒ ἢ ἲ ὂ ὒ ὢ ᾂ ᾒ ᾢ ἆ ἦ ἶ ὖ ὦ ᾆ ᾖ ᾦ ἁ ἑ ἡ ἱ ὁ'
        mtg_se = mtg_se     + u' ι ο υ ω α η ω α ε η ι ο υ ω α η ω α ε η ι ο υ ω α η ω α η ι υ ω α η ω α ε η ι ο'

        mtg_apo = mtg_apo + u' ὑ ὡ ἅ ἕ ἥ ἵ ὅ ὕ ὥ ᾅ ᾕ ᾥ ἃ ἓ ἣ ἳ ὃ ὓ ὣ ᾃ ᾓ ᾣ ἇ ἧ ἷ ὗ ὧ ᾇ ᾗ ᾧ ᾰ ῐ ῠ ᾱ ῑ ῡ ῥ'
        mtg_se = mtg_se     + u' υ ω α ε η ι ο υ ω α η ω α ε η ι ο υ ω α η ω α η ι υ ω α η ω α ι υ α ι υ ρ'

        trkeys = mtg_apo.split(u' ')
        for i in range(len(trkeys)):
            trkeys[i] = ord(trkeys[i]) 

        trvals = mtg_se.split(u' ')
        trtable = dict(zip(trkeys,trvals)) 
 
        parametros = parametros.translate(trtable)

        parametros = re.sub(u'[^αβγδεζηθικλμνξοπρστυφχψω]', u'', parametros)

        protypo = u'{{DEFAULTSORT:' + parametros + '}}'
        return protypo

    def removeKleidaTaksinomhshsProtypo(self,keimeno,protypo):
        u"""
        remove old template if it is there
        """
        teliko_keimeno = re.sub(protypo,u'',keimeno)
        return teliko_keimeno

    def addKleidaTaksinomhshsProtypo(self,keimeno,protypo):
        u"""
        stuff provided protypo into the wikitext 
        right before interwiki links.
        """

        marker = '@@'
        while marker in keimeno:
            marker += '@'

        site = self.site 
        protypo = self.paragwghKleidaTaksinomhshsProtypo()
        interwiki = wikipedia.getLanguageLinks(keimeno, insite = site)
        textnoiws = wikipedia.removeLanguageLinks(keimeno.replace(marker,'').strip(), site = self.site) + site.family.category_text_separator + protypo + site.family.category_text_separator

        teliko_keimeno = wikipedia.replaceLanguageLinks(textnoiws, interwiki, site = self.site)
        return teliko_keimeno

class EpiloghKleidaTaksinomhshsBot:
    def __init__(self, generator, exceptions=[], acceptall = False):
        self.generator = generator
        self.acceptall = acceptall
        self.exceptions = exceptions
        # Load default summary message.
        wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg_standalone))

    def checkExceptions(self, original_text):
        """                                                                                         
        If one of the exceptions applies for the given text, returns the                               
        substring which matches the exception. Otherwise it returns None.                                     
        """
        for exception in self.exceptions:
            hit = exception.search(original_text)
            if hit:
                return hit.group(0)
        return None

    def treat(self, page):
        try:
            # Show the title of the page we're working on.
            # Highlight the title in purple.
            wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
            ccToolkit = EpiloghKleidaTaksinomhshsToolkit(page.site(), page.title, debug = True)
            keimeno = page.get()
            match = self.checkExceptions(keimeno)
            # skip all pages that contain certain texts          
            if match:
                wikipedia.output(u'Skipping %s because it contains %s' % (page.aslink(), match))
            else:
                allages = ccToolkit.allagh(keimeno)
                if allages != keimeno:
                    if not self.acceptall:
                        choice = wikipedia.inputChoice(u'Do you want to accept these changes?',  ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
                        if choice in ['a', 'A']:
                            self.acceptall = True
                    if self.acceptall or choice in ['y', 'Y']:
                        page.put(allages)
                else:
                    wikipedia.output(u"No changes for %s" % page.title())
        except wikipedia.NoPage:
            wikipedia.output(u"Page %s does not exist;!" % page.aslink())
        except wikipedia.IsRedirectPage:
            wikipedia.output("Page %s is a redirect, skipping." % page.aslink())
        except wikipedia.LockedPage:
            wikipedia.output(u"Page %s is locked?!" % page.aslink())

    def run(self):
        for page in self.generator:
            self.treat(page)
            original_text = page.get()

def main():
    #page generator
    gen = None
    PageTitles = []
    exceptions=[]
    namespaces = []
    regex = False
    caseInsensitive = False
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
        if arg.startswith('-except:'):
            exceptions.append(arg[8:])
        elif arg == '-regex':
            regex = True
        elif arg == '-nocase':
            caseInsensitive = True
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg.startswith('-page'):
            if len(arg) == 5:
                PageTitles.append(wikipedia.input(u'Which page do you want to change?'))
            else:
                PageTitles.append(arg[6:])
        else:
            generator = genFactory.handleArg(arg)
            if generator:
                gen = generator
            else:
                wikipedia.showHelp()
                
    for i in range(len(exceptions)):
        exception = exceptions[i]
        if not regex:
            exception = re.escape(exception)
        if caseInsensitive:
            exceptionR = re.compile(exception, re.UNICODE | re.IGNORECASE)
        else:
            exceptionR = re.compile(exception, re.UNICODE)
        exceptions[i] = exceptionR

    if PageTitles:
        pages = [wikipedia.Page(wikipedia.getSite(), PageTitle) for PageTitle in PageTitles]
        gen = iter(pages)
    if not gen:
        wikipedia.showHelp()
    elif wikipedia.inputChoice(warning + u'\nDo you want to continue?', ['yes', 'no'], ['y', 'N'], 'N') == 'y':
        if namespaces != []:
            gen =  pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = EpiloghKleidaTaksinomhshsBot(preloadingGen, exceptions)
        bot.run()

if __name__ == "__main__":
    try:
        main()
    finally:
        wikipedia.stopme()