-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmatch.py
41 lines (34 loc) · 1.37 KB
/
match.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import xml.etree.ElementTree as ET
from collections import defaultdict
def topic_extractor(isEven=False):
# Topics in dictionary
topics = ET.parse('topics-rnd5.xml')
root = topics.getroot()
even_topics = [{'topic_id': int(topic.attrib['number']),
'query': topic[0].text,
'question': topic[1].text,
'narrative': topic[2].text} for topic in root if int(topic.attrib['number']) % 2 == 0]
odd_topics = [{'topic_id': int(topic.attrib['number']),
'query': topic[0].text,
'question': topic[1].text,
'narrative': topic[2].text} for topic in root if int(topic.attrib['number']) % 2 == 1]
if isEven:
return even_topics
else:
return odd_topics
def topic_relevancy_extractor():
# Only Docs with Relevancy Judgements
topic_with_relevancy = defaultdict(list)
with open("relevancy-judgements.txt", "r") as f:
text = f.read()
text = text.split('\n')
for line in text:
items = line.split()
topic_id = int(items[0])
cord_uid = items[2]
relevancy = int(items[3])
topic_with_relevancy[cord_uid].append({
'topic_id' : topic_id,
'relevancy': relevancy
})
return topic_with_relevancy