Python:Diffbot.py
From Botwiki
This is a simple bot that gets recentchanges from a wiki and prints the diff between current version and the last version from a different user. If the page has been created or there are not previous users on it's history, the bot prints the page content. Of course, it has no utility because there are many more sophisticated bots that prints diff of all changes from a wiki, but it can be perfect to construct more sophisticated automatic bots from here by simply checking the diferences between contents and adding page.put(oldcontent, 'BOT reverting changes.') if the edit seems to be vandalism.
NOTE: If you have an old version of pywikipedia; you would need to upgrade it before using this bot. The function getOldVersion() was added recently.
######## IRC CONFIGURATION ######## server = 'irc.wikimedia.org' port = 6667 nickname = 'rc' channels = ['en.wikipedia', 'en.wiktionary', 'meta.wikimedia'] ################################### import wikipedia def check(data): if data['channel'] == '#meta.wikimedia': site = wikipedia.getSite('meta', 'meta') elif data['channel'] == '#commons.wikimedia': site = wikipedia.getSite('commons', 'commons') elif data['channel'] == '#incubator.wikimedia': site = wikipedia.getSite('incubator', 'incubator') elif data['channel'] == '#species.wikipedia': site = wikipedia.getSite('species', 'species') elif data['channel'] == '#mediawiki.wikipedia': site = wikipedia.getSite('mediawiki', 'mediawiki') else: try: site = wikipedia.getSite(data['channel'][1:].split('.')[0], data['channel'].split('.')[1]) except (ValueError, IndexError): wikipedia.output('Can\'t determinate the wikifamily of ' + data['channel'] + '. Skipping...') page = wikipedia.Page(site, data['title']) try: content = page.get() except wikipedia.IsRedirectPage: wikipedia.output('[[' + page.title() + ']] is a redirect page, skipping...') return except wikipedia.NoPage: wikipedia.output('[[' + page.title() + ']] not exists; maybe an IRC error, skipping...') return history = page.getVersionHistory(forceReload = True, revCount = 5) oldversion = None for version in history: if version[2] != data['user']: oldversion = version break if not oldversion and len(history) == 5: history = page.getVersionHistory(forceReload = True, getAll = True) oldversion = None for version in history: if version[2] != data['user']: oldversion = version break if oldversion: try: oldcontent = page.getOldVersion(oldid=oldversion[0]) except wikipedia.IsRedirectPage: wikipedia.output('[[' + page.title() + ']] was a redirect page on its previous version by ' + oldversion[2] + ', skipping...') return except wikipedia.NoPage: wikipedia.output('[[' + page.title() + ']] not exists; maybe an IRC error, skipping...') return wikipedia.output('################################### NEW EDIT ###################################\nPage: ' + page.aslink() + '\nCurrent version: ' + data['diff'] + ' (User: ' + data['user'] + ')\nOld version: ' + oldversion[0] + ' (User: ' + oldversion[2] + ')\nDiff:') wikipedia.showDiff(oldcontent, content) else: wikipedia.output('################################### NEW PAGE ###################################\nPage: ' + page.aslink() + '\nUser: ' + data['user'] + '\nContent:\n' + content) import re regexps = [re.compile(r':.*? PRIVMSG (?P<channel>.*) :\x0314\[\[\x0307(?P<title>.*?)\x0314\]\]\x034 (?P<flags>.*?)\x0310 \x0302http:\/\/.*?\/w\/index\.php\?title=.*?&diff=(?P<diff>[0-9]*)&oldid=(?P<oldid>[0-9]*)(&rcid=(?P<rcid>[0-9]*))?\x03 \x035\*\x03 \x0303(?P<user>.*?)\x03 \x035\*\x03 \(\x02?(?P<diffsize>[+-][0-9]*)\x02?\) \x0310(?P<comment>.*)\x03\r\n'), re.compile(r':.*? PRIVMSG (?P<channel>.*) :\x0314\[\[\x0307(?P<title>.*?)\x0314\]\]\x034 (?P<flags>.*?N)\x0310 \x0302http:\/\/.*?\/w\/index\.php\?title=.*?&rcid=(?P<rcid>[0-9]*)\x03 \x035\*\x03 \x0303(?P<user>.*?)\x03 \x035\*\x03 \(\x02?(?P<diffsize>[+-][0-9]*)\x02?\) \x0310(?P<comment>.*)\x03\r\n'), re.compile(r':.*? PRIVMSG (?P<channel>.*) :\x0314\[\[\x0307(?P<title>.*?)\x0314\]\]\x034 (?P<flags>.*?N)\x0310 \x0302http:\/\/.*?\/wiki/.*?\x03 \x035\*\x03 \x0303(?P<user>.*?)\x03 \x035\*\x03 \(\x02?(?P<diffsize>[+-][0-9]*)\x02?\) \x0310(?P<comment>.*)\x03\r\n')] import socket, random, thread def rcbot(): rc = socket.socket(socket.AF_INET, socket.SOCK_STREAM) rc.connect((str(server), int(port))) rc.recv(4096) def identify(): rand = str(random.randint(120, 9999)) nick = '%s%s' %(nickname[0:9-len(rand)], rand) rc.send('NICK %s\r\n' %nick) rc.send('USER %s %s %s :%s\r\n' %(nick, nick, nick, nick)) for channel in channels: if not str(channel)[0] in '&#!+': channel = '#%s' %channel rc.send('JOIN %s\r\n' %channel) return nick nick = identify() while True: try: d = rc.recv(4096) if d.find('PING') != -1: rc.send('PONG ' + d.split()[1] + '\r\n') elif d.endswith('433 * %s :Nickname is already in use.\r\n' %nick): nick = identify() elif d == '': try: rc.send('QUIT\r\n') except socket.error: pass rcbot() return d = d.decode('utf-8', 'replace') for r in regexps: m = r.match(d) if m: data = m.groupdict() thread.start_new_thread(check, (data,)) break except KeyboardInterrupt: rc.send('QUIT\r\n') return if __name__ == '__main__': try: rcbot() finally: wikipedia.stopme()
BlogMarks
del.icio.us
digg
Fark
Furl
Newsvine
reddit
Segnalo
Simpy
Slashdot
smarking
Spurl
Wists
