Purple exclamation mark.svg Planning the future of Botwiki! - Help us bring Botwiki up to date, contribute to our strategy discussion, add bot scripts, and contribute manuals, guides, and tutorials! Almost anything related to bots, particularly those used to edit mediawiki, is welcome.

Red exclamation mark.svg UNABLE TO EDIT? - We've experienced attacks by spambots lately and now require you to confirm your e-mail before you can edit (go to your preferences, enter an e-mail address, and request a confirmation e-mail, then go to your e-mail and click on the confirmation link). We also require new accounts to make a few edits and wait a few minutes before before you can create a page; however, if this is a problem contact us in #botwiki and we can manually confirm your account. Sorry for the inconvenience.

Python:Newpagespatrol.py

From Botwiki
Jump to: navigation, search
#!/usr/bin/python
# -*- coding: utf-8  -*-
"""
* list=recentchanges (rc) *
  Enumerate recent changes
 
This module requires read rights.
Parameters:
  rcstart        - The timestamp to start enumerating from.
  rcend          - The timestamp to end enumerating.
  rcdir          - In which direction to enumerate.
                   One value: newer, older
                   Default: older
  rcnamespace    - Filter log entries to only this namespace(s)
                   Values (separate with '|'): 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 100, 101, 102, 103
  rcprop         - Include additional pieces of information
                   Values (separate with '|'): user, comment, flags, timestamp, title, ids, sizes, redirect, patrolled, loginfo
                   Default: title|timestamp|ids
  rctoken        - Which tokens to obtain for each change
                   Values (separate with '|'): patrol
  rcshow         - Show only items that meet this criteria.
                   For example, to see only minor edits done by logged-in users, set show=minor|!anon
                   Values (separate with '|'): minor, !minor, bot, !bot, anon, !anon, redirect, !redirect, patrolled, !patrolled
  rclimit        - How many total changes to return.
                   No more than 500 (5000 for bots) allowed.
                   Default: 10
  rctype         - Which types of changes to show.
                   Values (separate with '|'): edit, new, log
 
"""
 
#
# (C) Filnik, 2009
#
# Distributed under the terms of the MIT license.
#
__version__ = '$Id: $'
#
 
import re, query
import wikipedia, datetime
 
def findExistanceVariable(array, name):
    """
    If the key is in the dict, return the value. If empty return True, otherwise return False
    """
    try:
        value = array[name]
    except KeyError:
        value = False
    else:
        if value == '':
            value = True
    return value
 
def recentchanges(rctype = None, rcnamespace = None, rclimit = 5):
    #action=query&list=recentchanges&rcprop=user|comment|flags|timestamp|title|ids|sizes|redirect|patrolled|loginfo
    """
    Function that load the recent changes.
    """
 
    params = {
        'action'    :'query',
        'list'      :'recentchanges',
        'rcprop'    :'user|comment|flags|timestamp|title|ids|sizes|redirect|patrolled|loginfo',
        'rclimit'   :str(rclimit),
        }
 
    if rctype:
        params['rctype'] = rctype
    if rcnamespace:
        params["rcnamespace"] = rcnamespace
    data = query.GetData(params,
                    useAPI = True, encodeTitle = False)
    for logline in data['query']['recentchanges']:
        yield logline
 
def userInfo(user):
    #action=query&list=users&ususers=Filnik|Gatto Nero&usprop=blockinfo|groups|editcount|registration
    """
    Function to find out the general infos about an user.
 
    Separate the users with "|" if needed.
    """
 
    params = {
        'action'    :'query',
        'list'      :'users',
        'usprop'    :'blockinfo|groups|editcount|registration',
        'ususers'   :user,
        }
 
    data = query.GetData(params,
                    useAPI = True, encodeTitle = False)
    return data['query']['users'][0]
 
class newPagesPatrol():
    def __init__(self):        
        self.args = wikipedia.handleArgs()
        self.site = wikipedia.getSite()
    def start(self):
        for self.logline in recentchanges(rctype = 'new', rcnamespace = '0'):
            self.comment = self.logline["comment"]
            self.newlen = str(self.logline["newlen"])
            self.rcid = str(self.logline["rcid"])
            self.pageid = str(self.logline["pageid"])
            self.title = self.logline["title"]
            self.timestamp = self.logline["timestamp"]
            self.revid = str(self.logline["revid"])
            self.old_revid = str(self.logline["old_revid"])
            self.user = self.logline["user"]
            self.oldlen = str(self.logline["oldlen"])
            self.ns = str(self.logline["ns"])
            self.typeLog = self.logline["type"]
            self.minor = findExistanceVariable(self.logline, "minor")
            self.redirect = findExistanceVariable(self.logline, "redirect")
            self.patrolled = findExistanceVariable(self.logline, "patrolled")
            self.logtype = findExistanceVariable(self.logline, "logtype")
            self.bot = findExistanceVariable(self.logline, "bot")
            self.main()
 
    def main(self):        
        self.wikiPage = wikipedia.Page(self.site, self.title)
        try:
            self.pageText = self.wikiPage.get()
        except wikipedia.NoPage:
            wikipedia.output(u'%s has been deleted. Skip.' % self.title)
            return False
        except wikipedia.IsRedirectPage:
            wikipedia.output(u'%s is a redirect. Skip.' % self.title)
            return False
        self.userInfoFound = userInfo(self.user)
        self.userIsAnIP = findExistanceVariable(self.userInfoFound, "invalid")
        if not self.userIsAnIP:
            self.editcount = findExistanceVariable(self.userInfoFound, "editcount")
            self.groups = findExistanceVariable(self.userInfoFound, "groups")
            if self.groups:
                self.groups = groups[0]
            self.registration = findExistanceVariable(self.userInfoFound, "registration")
            self.blockedby = findExistanceVariable(self.userInfoFound, "blockedby")
        else:
            self.editcount = False; self.groups = False; self.registration = False; self.blockedby = False
 
        print [self.userIsAnIP, self.editcount, self.groups, self.registration, self.blockedby]
        print [self.title, self.comment, self.newlen, self.rcid, self.pageid, self.timestamp,
               self.revid, self.old_revid, self.user, self.patrolled, self.oldlen, self.ns,
               self.typeLog, self.minor, self.redirect, self.patrolled, self.logtype, self.bot]
 
        print (float(self.pageText.count('[')) / float(len(self.pageText)))
 
        #0.0168441643121
        #0.0117899249732    
 
if __name__ == "__main__":
    old = datetime.datetime.strptime(str(datetime.datetime.utcnow()).split('.')[0], "%Y-%m-%d %H:%M:%S") #timezones are UTC
    try:
        classInit = newPagesPatrol()
        classInit.start()
    finally:
        final = datetime.datetime.strptime(str(datetime.datetime.utcnow()).split('.')[0], "%Y-%m-%d %H:%M:%S") #timezones are UTC
        delta = final - old
        secs_of_diff = delta.seconds
        wikipedia.output("Execution time: %s" % secs_of_diff)
        wikipedia.stopme()
Personal tools
Share