-
Notifications
You must be signed in to change notification settings - Fork 0
/
milanbot.py
148 lines (123 loc) · 4.36 KB
/
milanbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env python
#-*- coding: utf-8 -*-
import pywikibot as pwb
import milanbot.util.logger as log
import milanbot.transiteration as tr
from milanbot.config import parser
from milanbot.querier import wd_sparql
logger = log.terminal_logger()
file_logger = log.file_logger("test.csv")
def main(settings):
"""
Main function
:return:
"""
logger.info("Starting the bot...")
# Get the query
with open(settings.query, 'r') as query_file:
sparql = query_file.read()
# Retrieve the SPARQL results
sparql_pages = wd_sparql(query=sparql)
counter = 1
for page in sparql_pages:
instance = page.get(get_redirect=True)
gender = page['claims'][u'P21']
if len(gender) > 1:
continue
gender = gender[0].target.id
if gender == u'Q6581097':
gender = u'Q499327' # male
elif gender == u'Q6581072':
gender = u'Q1775415' # female
elif gender == u'Q16334295':
gender = u'Q146786' # group
citizenships = instance['claims']['P27']
if len(citizenships) > 1:
continue
citizenship = citizenships[0].target.get()
demonyms = citizenship['claims']['P1549']
# Determine a nationality for a person
description = extract_demonym(demonyms, gender)
if u'P106' not in instance['claims']:
continue
occupation_stack = extract_occupations(gender, instance)
if len(occupation_stack) == 0:
continue
for index, x in enumerate(occupation_stack):
if index == 0:
description += u' {}'.format(x)
elif index == len(occupation_stack)-1:
description += u' и {}'.format(x)
else:
description += u', {}'.format(x)
descriptions = dict()
descriptions[u'sr'] = description
descriptions[u'sr-ec'] = description
descriptions[u'sr-el'] = tr.transliterate(description)
page.editDescriptions(
descriptions=descriptions,
summary=u'Setting/updating Serbian descriptions: {}.'.format(description))
print('Added description {count}: {desc}'.format(
count=counter,
desc=description
))
counter += 1
if counter == 1000:
break
def extract_occupations(gender, instance):
occupations = instance['claims'][u'P106']
watchdog = 1
occupation_stack = list()
for occupation in occupations:
occupation = occupation.target.get()
if gender == u'Q1775415' and u'P2521' in occupation['claims']:
occupation = occupation['claims']['P2521']
elif gender == u'Q499327' and u'P3321' in occupation['claims']:
occupation = occupation['claims']['P3321']
else:
break
for name in occupation:
try:
target = name.getTarget()
if target is not None and u'sr' == target.language:
occupation_stack.append(name.target.text)
break
except AttributeError as e:
print('`target`: {obj} has no object `target`: {err}'.format(
obj=name.getID(),
err=e
))
watchdog += 1
if watchdog > 5:
break
return occupation_stack
def extract_demonym(demonyms, gender):
"""
Search through the list of demonyms and find the right one by gender
:param demonyms:
:param gender: may be male (u'Q499327') od female (u'Q1775415')
:return: demonym in Serbian language
"""
description = u''
for demonym in demonyms:
local_demonym = demonym.getTarget()
if local_demonym.language == u'sr':
demonym_qualifiers = demonym.qualifiers
if 'P518' in demonym_qualifiers:
demonym_gender = demonym_qualifiers['P518']
if len(demonym_gender) > 1:
exit()
demonym_gender = demonym_gender[0].target.id
if demonym_gender == gender:
description += local_demonym.text
break
return description
if __name__ == '__main__':
settings = parser.parse_args()
try:
main(settings)
except KeyboardInterrupt:
pass
finally:
logger.info("Shutting down the bot...")
pwb.stopme()