Python:Diffbot.py

From Botwiki

Jump to: navigation, search

This is a simple bot that gets recentchanges from a wiki and prints the diff between current version and the last version from a different user. If the page has been created or there are not previous users on it's history, the bot prints the page content. Of course, it has no utility because there are many more sophisticated bots that prints diff of all changes from a wiki, but it can be perfect to construct more sophisticated automatic bots from here by simply checking the diferences between contents and adding page.put(oldcontent, 'BOT reverting changes.') if the edit seems to be vandalism.

NOTE: If you have an old version of pywikipedia; you would need to upgrade it before using this bot. The function getOldVersion() was added recently.

######## IRC CONFIGURATION ########
server = 'irc.wikimedia.org'
port = 6667
nickname = 'rc'
channels = ['en.wikipedia', 'en.wiktionary', 'meta.wikimedia']
###################################
 
import wikipedia
def check(data):
	if data['channel'] == '#meta.wikimedia':
		site = wikipedia.getSite('meta', 'meta')
	elif data['channel'] == '#commons.wikimedia':
		site = wikipedia.getSite('commons', 'commons')
	elif data['channel'] == '#incubator.wikimedia':
		site = wikipedia.getSite('incubator', 'incubator')
	elif data['channel'] == '#species.wikipedia':
		site = wikipedia.getSite('species', 'species')
	elif data['channel'] == '#mediawiki.wikipedia':
		site = wikipedia.getSite('mediawiki', 'mediawiki')
	else:
		try:
			site = wikipedia.getSite(data['channel'][1:].split('.')[0], data['channel'].split('.')[1])
		except (ValueError, IndexError):
			wikipedia.output('Can\'t determinate the wikifamily of ' + data['channel'] + '. Skipping...')
	page = wikipedia.Page(site, data['title'])
	try:
		content = page.get()
	except wikipedia.IsRedirectPage:
		wikipedia.output('[[' + page.title() + ']] is a redirect page, skipping...')
		return
	except wikipedia.NoPage:
		wikipedia.output('[[' + page.title() + ']] not exists; maybe an IRC error, skipping...')
		return
	history = page.getVersionHistory(forceReload = True, revCount = 5)
	oldversion = None
	for version in history:
		if version[2] != data['user']:
			oldversion = version
			break
	if not oldversion and len(history) == 5:
		history = page.getVersionHistory(forceReload = True, getAll = True)
		oldversion = None
		for version in history:
			if version[2] != data['user']:
				oldversion = version
				break
	if oldversion:
		try:
			oldcontent = page.getOldVersion(oldid=oldversion[0])
		except wikipedia.IsRedirectPage:
			wikipedia.output('[[' + page.title() + ']] was a redirect page on its previous version by ' + oldversion[2] + ', skipping...')
			return
		except wikipedia.NoPage:
			wikipedia.output('[[' + page.title() + ']] not exists; maybe an IRC error, skipping...')
			return
		wikipedia.output('################################### NEW EDIT ###################################\nPage: ' + page.aslink() + '\nCurrent version: ' + data['diff'] + ' (User: ' + data['user'] + ')\nOld version: ' + oldversion[0] + ' (User: ' + oldversion[2] + ')\nDiff:')
		wikipedia.showDiff(oldcontent, content)
	else:
		wikipedia.output('################################### NEW PAGE ###################################\nPage: ' + page.aslink() + '\nUser: ' + data['user'] + '\nContent:\n' + content)
 
import re
regexps = [re.compile(r':.*? PRIVMSG (?P<channel>.*) :\x0314\[\[\x0307(?P<title>.*?)\x0314\]\]\x034 (?P<flags>.*?)\x0310 \x0302http:\/\/.*?\/w\/index\.php\?title=.*?&diff=(?P<diff>[0-9]*)&oldid=(?P<oldid>[0-9]*)(&rcid=(?P<rcid>[0-9]*))?\x03 \x035\*\x03 \x0303(?P<user>.*?)\x03 \x035\*\x03 \(\x02?(?P<diffsize>[+-][0-9]*)\x02?\) \x0310(?P<comment>.*)\x03\r\n'), re.compile(r':.*? PRIVMSG (?P<channel>.*) :\x0314\[\[\x0307(?P<title>.*?)\x0314\]\]\x034 (?P<flags>.*?N)\x0310 \x0302http:\/\/.*?\/w\/index\.php\?title=.*?&rcid=(?P<rcid>[0-9]*)\x03 \x035\*\x03 \x0303(?P<user>.*?)\x03 \x035\*\x03 \(\x02?(?P<diffsize>[+-][0-9]*)\x02?\) \x0310(?P<comment>.*)\x03\r\n'), re.compile(r':.*? PRIVMSG (?P<channel>.*) :\x0314\[\[\x0307(?P<title>.*?)\x0314\]\]\x034 (?P<flags>.*?N)\x0310 \x0302http:\/\/.*?\/wiki/.*?\x03 \x035\*\x03 \x0303(?P<user>.*?)\x03 \x035\*\x03 \(\x02?(?P<diffsize>[+-][0-9]*)\x02?\) \x0310(?P<comment>.*)\x03\r\n')]
 
import socket, random, thread
def rcbot():
	rc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
	rc.connect((str(server), int(port)))
	rc.recv(4096)
	def identify():
		rand = str(random.randint(120, 9999))
		nick = '%s%s' %(nickname[0:9-len(rand)], rand)
		rc.send('NICK %s\r\n' %nick)
		rc.send('USER %s %s %s :%s\r\n' %(nick, nick, nick, nick))
		for channel in channels:
			if not str(channel)[0] in '&#!+':
				channel = '#%s' %channel
			rc.send('JOIN %s\r\n' %channel)
		return nick
	nick = identify()
	while True:
		try:
			d = rc.recv(4096)
			if d.find('PING') != -1:
				rc.send('PONG ' + d.split()[1] + '\r\n')
			elif d.endswith('433 * %s :Nickname is already in use.\r\n' %nick):
				nick = identify()
			elif d == '':
				try:
					rc.send('QUIT\r\n')
				except socket.error:
					pass
				rcbot()
				return
			d = d.decode('utf-8', 'replace')
			for r in regexps:
				m = r.match(d)
				if m:
					data = m.groupdict()
					thread.start_new_thread(check, (data,))
					break
		except KeyboardInterrupt:
			rc.send('QUIT\r\n')
			return
 
if __name__ == '__main__':
	try:
		rcbot()
	finally:
		wikipedia.stopme()
Personal tools