Purple exclamation mark.svg Planning the future of Botwiki! - Help us bring Botwiki up to date, contribute to our strategy discussion, add bot scripts, and contribute manuals, guides, and tutorials! Almost anything related to bots, particularly those used to edit mediawiki, is welcome.

Red exclamation mark.svg UNABLE TO EDIT? - We've experienced attacks by spambots lately and now require you to confirm your e-mail before you can edit (go to your preferences, enter an e-mail address, and request a confirmation e-mail, then go to your e-mail and click on the confirmation link). We also require new accounts to make a few edits and wait a few minutes before before you can create a page; however, if this is a problem contact us in #botwiki and we can manually confirm your account. Sorry for the inconvenience.

Python:Import-en.py

From Botwiki
Jump to: navigation, search
#!/usr/bin/python
# -*- coding: utf-8  -*-
"""
This is a script written by Filnik using the import class.
 
 
Guide for Italian's importers (for the other, the code is well commented ^_-)
 
Allora non ho molta voglia di programmare in modo "super-figo" quindi vi dovete
un po' arrrangiare :P in ogni caso, se volete cambiare la pagina da cui caricare
modificate pageLoad = '' con quello che volete (togliete l'ultimo ' e cercate quello)
se invece volete cambiare regex, cercate (appunto) regex. Per il resto, basta
mettere il file nella pagina selezionato (e cambiare la regex, se serve)
e dargli ok. Difficile? :-) Il bot tiene un log, in Utente:Filbot/Log per aggiornare
poi una pagina con tutti gli import fatti. Quindi non commentate quelle linee, pls!
 
"""
#
# (C) Filnik, 2007
#
# Greetings:
# Lorenzo Paulatto and Misza13
#
# Distributed under the terms of the MIT license.
#
# Version: 2.0
#
 
import wikipedia, re, time
import catlib
 
# Global variables
site = wikipedia.getSite()
# ################################################################ #
def main():
    starter = None
    cat = u"Italian nouns"
    ensite = wikipedia.getSite('en', 'wiktionary')
    wikipedia.output(u'\t\t\t  >> Start! <<')
    pos = 0
    parseList = list()
    category_to_move = list()
    categorynmspace = site.namespace(14) + ':'
    catitle = catlib.Category(ensite, 'Category:' + cat)
    for i in catitle._parseCategory(False, False, starter):
        if not site.namespace(14).lower() in i[-1].lower() or not 'category' in i[-1].lower():
            pagetoload = i[1]
        oldpag = wikipedia.Page(site, pagetoload)
        newpag = pagetoload.lower()
        # Searching if the name of the page has brackets (so the bot will
        # delete them and what is inside)
        if '(' in newpag:
            newpag = re.sub(r'(.*?) ?\((.*?)\) ?(.*?)', r'\1\3', newpag)
        oldpag2 = wikipedia.Page(site, newpag)
        # Check that the Page doesn't still exist
        if not oldpag.exists():
            if not oldpag2.exists():
                importerbot = Importer(site) # Inizializing the Bot
                importpage = pagetoload.encode(site.encoding())
                while 1:
                    try:
                        domandami = False
                        if domandami == False:
                            if 'category' in pagetoload.lower() or 'categoria' in pagetoload.lower():
                                continue
                            else:
                                wikipedia.output('Importing %s...' % pagetoload)
                        result = importerbot.Import(importpage, prompt = domandami)
                    except KeyError:
                        wikipedia.output(u'Error! The page has a strange character! skip!')
                        continue
                    if result == False:
                        wikipedia.output(u'Sleeping for 10 seconds and retry!')
                        time.sleep(10)
                        continue
                    else:
                        break
                pag = wikipedia.Page(site, pagetoload)
                if pagetoload != newpag:
                    result1 = pag.move(newpag, reason = 'Bot: Sposto pagina da Maiuscola a minuscola')
                    if result1 == False:
                        break
                        wikipedia.stopme()
                    else:
                        wikipedia.output(u'Page moved successfully!')
                    result2 = pag.delete('Bot: Cancello redirect inutile', False)
                    if result2 == False:
                        break
                        wikipedia.stopme()
                # This block isn't already tested and may give errors so, be careful ^_-
                # It may happen that something (i really don't know what) go wrong and
                # the media-wiki software (i have checked, the page deleted hasn't written
                # who has deleted it) delete wrongly not the redirect but the page created.
                # This block will undelete the page and delete the right one.
                sett = False
                while 1:
                    try:
                        netxt = wikipedia.Page(site, newpag).get()
                        break
                    except wikipedia.NoPage:
                        if sett == False:
                            sett = True
                            time.sleep(10)
                            continue
                        else:
                            wikipedia.output(u'Page Deleted! WARNING! BUG! Trying to solve the problem...')
                            wikipedia.Page(site, newpag).undelete('Bot: Ripesco pagina cancellata per errore dal software mediawiki.')
                            pag.delete('Bot: Cancello redirect inutile', False)
                            continue
                # Deleting the empty category that are in the wiktionary page...
                if 'category' in netxt.lower() or 'categoria' in netxt.lower():
                    netxt = re.sub(r'\[\[[Cc]ategor(ia|y):(.*?)\]\](\n)?', r'',  netxt)
                netxt = re.sub(r'(==|=|===|====|=====|=====) ?[Ii]talian ?(==|=|===|====|=====|=====)', r'{{-it-}}',  netxt)
                netxt = re.sub(r'(==|=|===|====|=====|=====) ?[nN]oun ?(==|=|===|====|=====|=====)', r'{{-noun-}}',  netxt)
                netxt = re.sub(r'[Aa]lternative forms', r'Altre forme',  netxt)
                netxt = '{{Transfen}}\n' + netxt
                wikipedia.Page(site, newpag).put(netxt, 'Bot: fixes vari')
                continue
            else:
                wikipedia.output(newpag + u" is already in wiktionary... skip!")
                continue                
        else:
            wikipedia.output(newpag + u" is already in wiktionary... skip!")
            continue
 
#<----------------------------OFF LIMITS!--------------------------------->#          
"""
****************************************************************************
System functions follow: no changes should be necessary!
****************************************************************************
"""
#<----------------------------OFF LIMITS!--------------------------------->#
 
import urllib
import login, config
 
class Importer(wikipedia.Page):
    def __init__(self, site):
        wikipedia.Page.__init__(self, site, 'Special:Import', None, 0)
 
    def Import(self, target, project = 'en', crono = '1', namespace = '', prompt = True):
        """Import the page from the wiki. Requires administrator status.
        If prompt is True, asks the user if he wants to delete the page.
        """
        # Fixing the crono value...
        if crono == True:
            crono = '1'
        elif crono == False:
            crono = '0'
        elif crono == '0':
            pass
        elif crono == '1':
            pass
        else:
            wikipedia.output(u'Crono value, wrongly setted.')
            wikipedia.stopme()
        # Fixing namespace's value.
        if namespace == '0':
            namespace == ''        
        answer = 'y'
        if prompt:
            answer = wikipedia.inputChoice(u'Do you want to import %s?' % target, ['Yes', 'No'], ['y', 'N'], 'N')
        if answer in ['y', 'Y']:
            host = self.site().hostname()
            address = '/w/index.php?title=%s&action=submit' % self.urlname()
            # You need to be a sysop for the import.
            self.site().forceLogin(sysop = True)
            # Getting the token.
            token = self.site().getToken(self, sysop = True)
            # Defing the predata.
            predata = {
                'action' : 'submit',
                'source' : 'interwiki',
                # from what project do you want to import the page?
                'interwiki' : project,
                # What is the page that you want to import?
                'frompage' : target,
                # The entire history... or not?
                'interwikiHistory' : crono,
                # What namespace do you want?
                'namespace': '',
            }
            time.sleep(8)
            if self.site().hostname() in config.authenticate.keys():
                predata['Content-type'] = 'application/x-www-form-urlencoded'
                predata['User-agent'] = useragent
                data = self.site().urlEncode(predata)
                response = urllib2.urlopen(urllib2.Request('http://' + self.site().hostname() + address, data))
                data = u''
            else:
                response, data = self.site().postForm(address, predata, sysop = True)
            if data:
                wikipedia.output(u'Page imported, checking...')
                time.sleep(2)
                if wikipedia.Page(site, target.decode(site.encoding())).exists():
                    wikipedia.output(u'Import success!')
                    return True
                else:
                    wikipedia.output(u'Import failed!')
                    rock = file('print.txt', 'w')
                    rock.write(data.encode('utf-8'))
                    return False
if __name__=='__main__':
    try:
        main()
    finally:
        wikipedia.stopme()
Personal tools
Share