Python:Bildechecker

From Botwiki

(Redirected from Python:Bildechecker.py)
Jump to: navigation, search
"""
Bot written for piero_tasso to find the images that haven't the comment (these comment can be useful
for blind people so it can be useful find what images to fix).
 
This script is unofficial and taken from:
 
http://fkregex.sno.cc/
 
Parameters:
 
-start:     Set from what page you want to start (default: !)
 
-page:      Set the page to check (default: use -start)
"""
 
import wikipedia, re
 
regexp = '\[\[[Ii](mage|mmagine):(.*?)\|(.*?)\]\]'
rgxp = '<div class=\"thumbcaption\">(.*?)</div>'
reg = '<img alt=\"(.*?)\"'
 
r = re.compile(regexp, re.UNICODE)
p = re.compile(rgxp, re.UNICODE)
q = re.compile(reg, re.UNICODE)
page = '!'
start = True
page = False
for arg in wikipedia.handleArgs():
    if arg.startswith('-start:'):
        start = True
        if len(arg) == 5:
            page = wikipedia.input(u'From what page do you want to start?')
        else:
            page = arg[6:]
    elif arg.startswith('-page:'):
        page = True
        start = False
        if len(arg) == 5:
            page_alone = wikipedia.input(u'From what page do you want to start?')
        else:
            page_alone = arg[6:]
 
# I've used a function to report the username to a wiki-page
def report(page_check):
    com = 'Bot: Aggiungo pagina da controllare'
    another_page = wikipedia.Page('it', 'Utente:Tassobot/Log')
    if another_page.exists():      
        text_gett = another_page.get()
    else:
        text_gett = ''
    pos4 = 0
    # The talk page includes "_" between the two names, in this way i replace them to " "
    regex87 = page_check
    n7 = re.compile(regex87, re.UNICODE)
    y7 = n7.search(text_gett, pos4)
    if y7 == None:
        # Adding the log :)
        rep_text = '\n*[[%s]]' % page_check
        another_page.put(text_gett + rep_text, comment = com, minorEdit = True)
        wikipedia.output(u"...Reported...")
    else:
        pos = y7.end()
        wikipedia.output(u"The page is already in the report page.")
 
def main(i):
    wikipedia.output(u'Getting %s...' % i)
    page_check = re.sub(r'\[\[(.*?)\]\]', r'\1', str(i))
    x = wikipedia.Page(site, page_check)
    text = x.get()
    pos = 0
    while 1:
        m = r.search(text, pos)
        if m == None:
            wikipedia.output(u'No images fonded...\n')
            break
        pos = m.end()
        pos1 = 0
        pos2 = 0
        wikipedia.output(u'Getting page to check if it has images...')
        text_get = site.getUrl('/wiki/' + page_check)
        while 1:
            h = p.search(text_get, pos1)
            if h == None:
                o = q.search(text_get, pos2)
                if o == None:
                    report(page_check)
                    break
                else:
                    wikipedia.output(u"There is the image's description but without thumb...\n")
                    break
            else:
                wikipedia.output(u"There is the image's explanation...\n")
                break
        break
try:
    site = wikipedia.getSite()
    if start == True:
        pages = site.allpages(page, 0, False)
        for i in pages:
            main(i)
    elif page == True:
        main('[[%s]]' % page_alone)
finally:
    wikipedia.stopme()
wikipedia.stopme()
Personal tools