-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathdict_lookup.py
69 lines (56 loc) · 1.74 KB
/
dict_lookup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import sys
def mkDict(lines,src,tgt):
dict = {}
for line in lines:
fields = line.split(';')
if len(fields) >= 2:
for word in fields[src].split(','):
sword = word.strip()
dict[sword] = dict.get(sword,[])
for trans in fields[tgt].split(','):
dict[sword].append(trans.strip())
elif fields:
pass # print("INTE MED", line)
else:
pass
return dict
def readDict(filename,src,tgt):
with open(filename) as file:
lines = file.readlines()
return mkDict(lines,src,tgt)
def main():
if len(sys.argv) > 2:
filename = sys.argv[1]
src,tgt = sys.argv[2].split(',')
dict = readDict(filename,int(src),int(tgt))
elif len(sys.argv) == 2:
filename = sys.argv[1]
dict = readDict(filename,0,1)
else:
print("dictionary lookup from ;-separated files")
print("usage: dict_lookup <dictfile.csv> <from_column,to_column>?")
return
prompt = "ange sökord+enter, sluta med enter> "
query = input(prompt)
while query:
for trans in dict.get(query,["hittar inte"]):
print(trans)
query = input(prompt)
if __name__ == "__main__":
main()
##############
# make a lemmatization lexicon produced from lists lemma,form,...form
def mkLemmaDict(lines):
dict = {}
for line in lines:
words = line.split(',')
lemma = words[0].strip()
for word in words[1:]:
sword = word.strip()
if sword not in dict:
dict[sword] = [lemma]
elif lemma not in dict[sword]:
dict[sword].append(lemma)
else:
pass
return dict