Planning the future of Botwiki! - Help us bring Botwiki up to date, contribute to our strategy discussion, add bot scripts, and contribute manuals, guides, and tutorials! Almost anything related to bots, particularly those used to edit mediawiki, is welcome.
UNABLE TO EDIT? - We've experienced attacks by spambots lately and now require you to confirm your e-mail before you can edit (go to your preferences, enter an e-mail address, and request a confirmation e-mail, then go to your e-mail and click on the confirmation link). We also require new accounts to make a few edits and wait a few minutes before before you can create a page; however, if this is a problem contact us in #botwiki and we can manually confirm your account. Sorry for the inconvenience.
Python:Import-en.py
#!/usr/bin/python # -*- coding: utf-8 -*- """ This is a script written by Filnik using the import class. Guide for Italian's importers (for the other, the code is well commented ^_-) Allora non ho molta voglia di programmare in modo "super-figo" quindi vi dovete un po' arrrangiare :P in ogni caso, se volete cambiare la pagina da cui caricare modificate pageLoad = '' con quello che volete (togliete l'ultimo ' e cercate quello) se invece volete cambiare regex, cercate (appunto) regex. Per il resto, basta mettere il file nella pagina selezionato (e cambiare la regex, se serve) e dargli ok. Difficile? :-) Il bot tiene un log, in Utente:Filbot/Log per aggiornare poi una pagina con tutti gli import fatti. Quindi non commentate quelle linee, pls! """ # # (C) Filnik, 2007 # # Greetings: # Lorenzo Paulatto and Misza13 # # Distributed under the terms of the MIT license. # # Version: 2.0 # import wikipedia, re, time import catlib # Global variables site = wikipedia.getSite() # ################################################################ # def main(): starter = None cat = u"Italian nouns" ensite = wikipedia.getSite('en', 'wiktionary') wikipedia.output(u'\t\t\t >> Start! <<') pos = 0 parseList = list() category_to_move = list() categorynmspace = site.namespace(14) + ':' catitle = catlib.Category(ensite, 'Category:' + cat) for i in catitle._parseCategory(False, False, starter): if not site.namespace(14).lower() in i[-1].lower() or not 'category' in i[-1].lower(): pagetoload = i[1] oldpag = wikipedia.Page(site, pagetoload) newpag = pagetoload.lower() # Searching if the name of the page has brackets (so the bot will # delete them and what is inside) if '(' in newpag: newpag = re.sub(r'(.*?) ?\((.*?)\) ?(.*?)', r'\1\3', newpag) oldpag2 = wikipedia.Page(site, newpag) # Check that the Page doesn't still exist if not oldpag.exists(): if not oldpag2.exists(): importerbot = Importer(site) # Inizializing the Bot importpage = pagetoload.encode(site.encoding()) while 1: try: domandami = False if domandami == False: if 'category' in pagetoload.lower() or 'categoria' in pagetoload.lower(): continue else: wikipedia.output('Importing %s...' % pagetoload) result = importerbot.Import(importpage, prompt = domandami) except KeyError: wikipedia.output(u'Error! The page has a strange character! skip!') continue if result == False: wikipedia.output(u'Sleeping for 10 seconds and retry!') time.sleep(10) continue else: break pag = wikipedia.Page(site, pagetoload) if pagetoload != newpag: result1 = pag.move(newpag, reason = 'Bot: Sposto pagina da Maiuscola a minuscola') if result1 == False: break wikipedia.stopme() else: wikipedia.output(u'Page moved successfully!') result2 = pag.delete('Bot: Cancello redirect inutile', False) if result2 == False: break wikipedia.stopme() # This block isn't already tested and may give errors so, be careful ^_- # It may happen that something (i really don't know what) go wrong and # the media-wiki software (i have checked, the page deleted hasn't written # who has deleted it) delete wrongly not the redirect but the page created. # This block will undelete the page and delete the right one. sett = False while 1: try: netxt = wikipedia.Page(site, newpag).get() break except wikipedia.NoPage: if sett == False: sett = True time.sleep(10) continue else: wikipedia.output(u'Page Deleted! WARNING! BUG! Trying to solve the problem...') wikipedia.Page(site, newpag).undelete('Bot: Ripesco pagina cancellata per errore dal software mediawiki.') pag.delete('Bot: Cancello redirect inutile', False) continue # Deleting the empty category that are in the wiktionary page... if 'category' in netxt.lower() or 'categoria' in netxt.lower(): netxt = re.sub(r'\[\[[Cc]ategor(ia|y):(.*?)\]\](\n)?', r'', netxt) netxt = re.sub(r'(==|=|===|====|=====|=====) ?[Ii]talian ?(==|=|===|====|=====|=====)', r'{{-it-}}', netxt) netxt = re.sub(r'(==|=|===|====|=====|=====) ?[nN]oun ?(==|=|===|====|=====|=====)', r'{{-noun-}}', netxt) netxt = re.sub(r'[Aa]lternative forms', r'Altre forme', netxt) netxt = '{{Transfen}}\n' + netxt wikipedia.Page(site, newpag).put(netxt, 'Bot: fixes vari') continue else: wikipedia.output(newpag + u" is already in wiktionary... skip!") continue else: wikipedia.output(newpag + u" is already in wiktionary... skip!") continue #<----------------------------OFF LIMITS!---------------------------------># """ **************************************************************************** System functions follow: no changes should be necessary! **************************************************************************** """ #<----------------------------OFF LIMITS!---------------------------------># import urllib import login, config class Importer(wikipedia.Page): def __init__(self, site): wikipedia.Page.__init__(self, site, 'Special:Import', None, 0) def Import(self, target, project = 'en', crono = '1', namespace = '', prompt = True): """Import the page from the wiki. Requires administrator status. If prompt is True, asks the user if he wants to delete the page. """ # Fixing the crono value... if crono == True: crono = '1' elif crono == False: crono = '0' elif crono == '0': pass elif crono == '1': pass else: wikipedia.output(u'Crono value, wrongly setted.') wikipedia.stopme() # Fixing namespace's value. if namespace == '0': namespace == '' answer = 'y' if prompt: answer = wikipedia.inputChoice(u'Do you want to import %s?' % target, ['Yes', 'No'], ['y', 'N'], 'N') if answer in ['y', 'Y']: host = self.site().hostname() address = '/w/index.php?title=%s&action=submit' % self.urlname() # You need to be a sysop for the import. self.site().forceLogin(sysop = True) # Getting the token. token = self.site().getToken(self, sysop = True) # Defing the predata. predata = { 'action' : 'submit', 'source' : 'interwiki', # from what project do you want to import the page? 'interwiki' : project, # What is the page that you want to import? 'frompage' : target, # The entire history... or not? 'interwikiHistory' : crono, # What namespace do you want? 'namespace': '', } time.sleep(8) if self.site().hostname() in config.authenticate.keys(): predata['Content-type'] = 'application/x-www-form-urlencoded' predata['User-agent'] = useragent data = self.site().urlEncode(predata) response = urllib2.urlopen(urllib2.Request('http://' + self.site().hostname() + address, data)) data = u'' else: response, data = self.site().postForm(address, predata, sysop = True) if data: wikipedia.output(u'Page imported, checking...') time.sleep(2) if wikipedia.Page(site, target.decode(site.encoding())).exists(): wikipedia.output(u'Import success!') return True else: wikipedia.output(u'Import failed!') rock = file('print.txt', 'w') rock.write(data.encode('utf-8')) return False if __name__=='__main__': try: main() finally: wikipedia.stopme()