-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathdemo_queries.py
105 lines (86 loc) · 3.2 KB
/
demo_queries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
"""
Query demonstrations
====================
This script demonstrates interesting use of the `mwapi` library to login and
query English Wikipedia's MediaWiki API. Here's the basic flow:
0. Create session with user-agent
1. Log in
2. Confirm logged-in status
3. Query for the last 5 revisions of User_talk:EpochFail
4. Query for these revisions by their revision ID
5. Cause the API to throw an error and catch it.
"""
import sys
from itertools import islice
import mwapi
import mwapi.cli
import mwapi.errors
my_agent = 'mwapi demo script <[email protected]>'
session = mwapi.Session('https://en.wikipedia.org',
formatversion=2,
user_agent=my_agent)
mwapi.cli.do_login(session, 'https://en.wikipedia.org')
print("whoami?")
print("\t", session.get(action='query', meta='userinfo'), "\n")
def query_revisions_by_revids(revids, batch=50, **params):
revids_iter = iter(revids)
while True:
batch_ids = list(islice(revids_iter, 0, batch))
if len(batch_ids) == 0:
break
else:
doc = session.post(action='query', prop='revisions',
revids=batch_ids, **params)
for page_doc in doc['query']['pages']:
page_meta = {k: v for k, v in page_doc.items()
if k != 'revisions'}
if 'revisions' in page_doc:
for revision_doc in page_doc['revisions']:
revision_doc['page'] = page_meta
yield revision_doc
def query_revisions(title=None, pageid=None, batch=50, limit=50,
**params):
if title is None and pageid is None:
raise TypeError("query_revisions requires 'title' or 'pageid'")
params.update({
'titles': title,
'pageids': pageid
})
yielded = 0
response_docs = session.post(action='query', prop='revisions',
rvlimit=min(batch, limit),
continuation=True,
**params)
for doc in response_docs:
for page_doc in doc['query']['pages']:
page_meta = {k: v for k, v in page_doc.items() if k != 'revisions'}
if 'revisions' in page_doc:
for revision_doc in page_doc['revisions']:
revision_doc['page'] = page_meta
yield revision_doc
yielded += 1
if yielded >= limit:
break
if yielded >= limit:
break
if yielded >= limit:
break
print("Querying by title")
rev_ids = []
sys.stdout.write("\t ")
for doc in query_revisions(title="User_talk:EpochFail", rvprop="ids", limit=70):
sys.stdout.write(".")
sys.stdout.flush()
rev_ids.append(doc['revid'])
sys.stdout.write("\n\n")
print("Querying by rev_id")
for doc in query_revisions_by_revids(rev_ids):
print("\t", doc['page'], doc['revid'], doc['comment'])
print("")
print("Query with an error")
try:
session.get(action="query", prop="revisions", revids=[123523], rvlimit=2)
except mwapi.errors.APIError as e:
print("\t", "An APIError was caught.")
print("\t", e)
print("")