사용자:풀빵/스크립트작업실/withoutinterwiki quick.py

위키백과, 우리 모두의 백과사전.
#!/usr/bin/python
# -*- coding: utf-8  -*-

"""
This bot goes over multiple pages of the home wiki, searches for links, and
show ko-language interwiki.

These command line parameters can be used to specify which pages to work on:

&params;

    -xml           Retrieve information from a local XML dump (pages-articles
                   or pages-meta-current, see http://download.wikimedia.org).
                   Argument can also be given as "-xml:filename".

    -namespace:n   Number of namespace to process. The parameter can be used
                   multiple times. It works in combination with all other
                   parameters, except for the -start parameter. If you e.g.
                   want to iterate over all categories starting at M, use
                   -start:Category:M.

All other parameters will be regarded as part of the title of a single page,
and the bot will only work on that single page.
"""

__version__='$Id: withouinterlink_ko.py 4251 2007-09-12 10:36:03Z ... $'

import wikipedia, pagegenerators, catlib
import re, sys


class ShowWithoutInterwikiBot:

	def __init__(self, generator):
		self.generator = generator
		linktrail = wikipedia.getSite().linktrail()

	def treat(self, page):
		ostring = "* "
		ostring += page.aslink()
		ostring += " "
		lCategories = page.categories()
	
		if len(lCategories) > 0:
			for cat1 in lCategories:
				ostring += " " + cat1.aslink()
				
		wikipedia.output(ostring, toStdout = True);

	def run(self):
		for page in self.generator:
			wikipedia.output(u"111")
			try:
				self.treat(page)
			except:
				wikipedia.output(u"stop!ex")
				continue
		wikipedia.stopme()

def keytitle(x):
	return x.encode( 'utf-16')


	

class PageT:
	def __init__(self):
		self.lLink = []
		self.iIndex = 0

	def prepare(self):
		seen = set()
		path = "\
/wiki/%EC%82%AC%EC%9A%A9%EC%9E%90:ChongDae/%EC%9D%B8%ED%84%B0%EC%9C%84%ED%82%A4%EC%96%B8%EC%96%B4%EC%88%98%EB%A6%AC"
		html = wikipedia.getSite().getUrl(path)
		entryR = re.compile('<a href=".+?" title="(?P<title>.+?)">.+?</a>')
		for m in entryR.finditer(html):
			title = m.group('title')
			wikipedia.output(title)
			if title not in seen:
				seen.add(title)
		for pPage in seen:
			self.lLink.append(pPage)
		self.lLink.sort(key=keytitle)
	
	def withoutinterwiki_ko(self):
		page = wikipedia.Page(wikipedia.getSite(), self.lLink[self.iIndex])
		self.iIndex += 1
		yield page

	def go(self):
		for link1 in self.lLink:
			try:
				wikipedia.get_throttle()
				page = wikipedia.Page(wikipedia.getSite(), link1)

				ostring = "* "
				ostring += page.aslink()
				ostring += " "
				lCategories = page.categories()
		
				if len(lCategories) > 0:
					for cat1 in lCategories:
						ostring += " " + cat1.aslink()

				if page.isDisambig():
					ostring += " " + "{{disambig}}"
				
				wikipedia.output(ostring)
				wikipedia.output(ostring, toStdout = True)
			except:
				wikipedia.output("exception! %s" % link1)
				continue
		

def main():
	paget = PageT()
	paget.prepare()

	paget.go()	


if __name__ == "__main__":
	try:
		main()
	finally:
		wikipedia.stopme()
					#page = Page(self, title)
					#yield page