User:Flubot/cedillaBot

Run with:

python cedillBot.py dușmăni (to edit only this page) ... or
python cedillBot.py -cat:"Romanian nouns" (to edit pages in a category) ... or
python cedillBot.py -file:listofwords (to retrieve page names from a list)

cedillaBot.py edit

#!/usr/bin/python
# -*- coding: utf-8 -*-
 
import wikipedia, pagegenerators, catlib
import re

class cedillabot:
 
  def __init__(self, generator, site, debug_bul = True):
    self.generator = generator
    self.debug_bul = debug_bul
    self.site = site
 
  def run(self):
    romanian = re.compile('==Romanian==')
    language_header = re.compile('^==([^=]+)==')
    Romanian_found = False
    skip_this_line = False
    etymology = re.compile('===Etymology===')
    other_header = re.compile('^===(.*)===$')
    ro_term = re.compile('\{\{term\|(.*)\|lang=ro\}\}')
    turkish = re.compile('Turkish')
    tr_term = re.compile('lang=tr')
    tr_etyl = re.compile('\{\{etyl\|tr\|')
    s1 = re.compile(u'ş')
    s2 = re.compile(u'Ş')
    t1 = re.compile(u'ţ')
    t2 = re.compile(u'Ţ')

    # Here is defined the entry to modificate
 
    for p in self.generator:
          titlos = p.title()
          print(titlos)
          page = wikipedia.Page(wikipedia.getSite(), titlos)
          arxiko = page.get(get_redirect=True)

	  marker = '@@'
          while marker in arxiko:
            marker += '@'

          site = self.site 
          interwiki = wikipedia.getLanguageLinks(arxiko, insite = site)
          textnoiws = wikipedia.removeLanguageLinks(arxiko.replace(marker,'').strip(), site = self.site) + site.family.category_text_separator

        # Replace
	  Romanian_found = False
	  skip_this_line = False
          linesOftext = textnoiws.split("\n")
          my_new_text = []  
          for lineOfMyText in linesOftext:
		if romanian.search(lineOfMyText):
		   Romanian_found = True
		   skip_this_line = False
		elif etymology.search(lineOfMyText):
		     skip_this_line = True
		elif other_header.search(lineOfMyText):
		     skip_this_line = False
		elif not turkish.search(lineOfMyText) and not tr_term.search(lineOfMyText) and not tr_etyl.search(lineOfMyText):
		     skip_this_line = False
		elif language_header.search(lineOfMyText):
		     Romanian_found = False
		if Romanian_found:
		     lineOfMyText = t1.sub(u'ț', lineOfMyText)
		     lineOfMyText = t2.sub(u'Ț', lineOfMyText)	
		if Romanian_found and not skip_this_line:
		     lineOfMyText = s1.sub(u'ș', lineOfMyText)
		     lineOfMyText = s2.sub(u'Ș', lineOfMyText)
		if ro_term.search(lineOfMyText):
		     ro_word = ro_term.search(lineOfMyText).group(1)
		     ro_word2 = t1.sub(u'ț', ro_word)
		     ro_word2 = t2.sub(u'Ț', ro_word2)
		     ro_word2 = s1.sub(u'ş', ro_word2)
		     ro_word2 = s2.sub(u'Ş', ro_word2)
		     lineOfMyText = lineOfMyText.replace(ro_word, ro_word2)
		my_new_text.append(lineOfMyText)
          keimeno1 = "\n".join(my_new_text)
          keimeno = wikipedia.replaceLanguageLinks(keimeno1, interwiki, site = self.site)

          # Save page
          wikipedia.setAction(u'bot:changing cedillas with commas in Romanian entries')

          if keimeno1 != textnoiws:
            # Show the title of the page we're working on.
            # Highlight the title in purple.
              wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
            ## show what was changed
              wikipedia.showDiff(page.get(), keimeno)
              #choice= 'y'
              if self.debug_bul:
                choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['yes', 'No', 'all'], ['y', 'N', 'a'], 'N')
                if choice == 'a':
                  choice = 'y'
                  self.debug_bul = False
                elif choice != 'y':
                  choice='n'
              if choice == 'y':
                try:
                        # Save the page
                        page.put(keimeno)
                except wikipedia.IsRedirectPage:
                        wikipedia.output(u'Skipping %s because it is a redirect' % (page.title()))
                except wikipedia.EditConflict:
                        wikipedia.output(u'Skipping %s because of edit conflict' % (page.title()))
                except wikipedia.SpamfilterError, error:
                        wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (page.title(), error.url))
    return 0

          
def main():
    gen = None
    pageTitle = []
    for arg in wikipedia.handleArgs():
        if arg:
            if arg.startswith('-file:'):
                gen = pagegenerators.TextfilePageGenerator(arg[6:])
            elif arg.startswith('-cat:'):
                cat = catlib.Category(wikipedia.getSite(), arg[5:])
                gen = pagegenerators.CategorizedPageGenerator(cat)
            else:
                pageTitle.append(arg)
 
    if pageTitle:
        page = wikipedia.Page(wikipedia.getSite(), ' '.join(pageTitle))
        gen = iter([page])
    if not gen:
        wikipedia.showHelp('touch')
    else:
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        bot = cedillabot(preloadingGen, wikipedia.getSite())
        bot.run()

if __name__ == "__main__":
    try:
        main()
    finally:
        wikipedia.stopme()