#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
This bot goes over multiple pages of the home wiki, searches for links, and
show ko-language interwiki.
These command line parameters can be used to specify which pages to work on:
¶ms;
-xml Retrieve information from a local XML dump (pages-articles
or pages-meta-current, see http://download.wikimedia.org).
Argument can also be given as "-xml:filename".
-namespace:n Number of namespace to process. The parameter can be used
multiple times. It works in combination with all other
parameters, except for the -start parameter. If you e.g.
want to iterate over all categories starting at M, use
-start:Category:M.
All other parameters will be regarded as part of the title of a single page,
and the bot will only work on that single page.
"""
__version__='$Id: withouinterlink_ko.py 4251 2007-09-12 10:36:03Z ... $'
import wikipedia, pagegenerators, catlib
import re, sys
class ShowWithoutInterwikiBot:
def __init__(self, generator):
self.generator = generator
linktrail = wikipedia.getSite().linktrail()
def treat(self, page):
ostring = "* "
ostring += page.aslink()
ostring += " "
lCategories = page.categories()
if len(lCategories) > 0:
for cat1 in lCategories:
ostring += " " + cat1.aslink()
wikipedia.output(ostring, toStdout = True);
def run(self):
for page in self.generator:
wikipedia.output(u"111")
try:
self.treat(page)
except:
wikipedia.output(u"stop!ex")
continue
wikipedia.stopme()
def keytitle(x):
return x.encode( 'utf-16')
class PageT:
def __init__(self):
self.lLink = []
self.iIndex = 0
def prepare(self):
seen = set()
path = "\
/wiki/%EC%82%AC%EC%9A%A9%EC%9E%90:ChongDae/%EC%9D%B8%ED%84%B0%EC%9C%84%ED%82%A4%EC%96%B8%EC%96%B4%EC%88%98%EB%A6%AC"
html = wikipedia.getSite().getUrl(path)
entryR = re.compile('<a href=".+?" title="(?P<title>.+?)">.+?</a>')
for m in entryR.finditer(html):
title = m.group('title')
wikipedia.output(title)
if title not in seen:
seen.add(title)
for pPage in seen:
self.lLink.append(pPage)
self.lLink.sort(key=keytitle)
def withoutinterwiki_ko(self):
page = wikipedia.Page(wikipedia.getSite(), self.lLink[self.iIndex])
self.iIndex += 1
yield page
def go(self):
for link1 in self.lLink:
try:
wikipedia.get_throttle()
page = wikipedia.Page(wikipedia.getSite(), link1)
ostring = "* "
ostring += page.aslink()
ostring += " "
lCategories = page.categories()
if len(lCategories) > 0:
for cat1 in lCategories:
ostring += " " + cat1.aslink()
if page.isDisambig():
ostring += " " + "{{disambig}}"
wikipedia.output(ostring)
wikipedia.output(ostring, toStdout = True)
except:
wikipedia.output("exception! %s" % link1)
continue
def main():
paget = PageT()
paget.prepare()
paget.go()
if __name__ == "__main__":
try:
main()
finally:
wikipedia.stopme()
#page = Page(self, title)
#yield page