-
Notifications
You must be signed in to change notification settings - Fork 1
/
wikilink.py
49 lines (43 loc) · 1.33 KB
/
wikilink.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import os, sys
try:
import mwclient
except:
pass
import xml.etree.cElementTree as ET
HOST = "wiki.lagen.nu"
PATH = "/w/"
USER = "staffan"
PASS = "meneta"
#def import_page(title,wikimarkup,site):
# page = site.Pages[title]
# try:
# page.save(wikimarkup, summary="Offline-redigering")
# print "Saved %s" % title
# except mwclient.errors.EditError:
# print "Couldn't save '%s'" % title
#
#def extract_page(filename):
# tree = ET.parse(filename)
# title = tree.find("//{http://www.mediawiki.org/xml/export-0.3/}title").text
# wikimarkup = tree.find("//{http://www.mediawiki.org/xml/export-0.3/}text").text
# return (title,wikimarkup)
#
#
#def process_files(dir, site):
# for f in os.listdir(dir):
# filename = "%s%s%s" % (dir,os.path.sep,f)
# (title, wikimarkup) = extract_page(filename)
# import_page(title,wikimarkup,site)
if __name__ == "__main__":
site = mwclient.Site(HOST, PATH)
site.login(USER,PASS)
# get a list of all pages
pages = list(site.Pages)
keywords = {}
print len(pages)
for p in pages:
if p.redirect:
keywords[p.name] = list(p.links())[0].name
else:
keywords[p.name] = p.name
print "%s pages, %s real pages" % (len(pages), len(keywords))