-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathword_embedding.py
70 lines (53 loc) · 2.1 KB
/
word_embedding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from gensim.models import KeyedVectors
from gensim.models.wrappers import FastText
import numpy as np
class Word2Vec(object):
"""docstring for Word2Vec."""
def __init__(self, filename):
# add testing code here
print("Loading Model")
self.model = KeyedVectors.load_word2vec_format(filename)
print("Done! Model Loaded. Vector Size is ", len(self.model["hello"]))
def get_embeddings(self, words):
ret = [self.model[x] for x in words if x in self.model.vocab]
if len(ret) == 0:
return [self.model["unk"]]
return ret
class GloveVectors(object):
"""docstring for GloveVectors."""
def __init__(self, filename):
print("Loading Model")
self.model = self.loadGloveModel(filename)
def loadGloveModel(self, gloveFile):
f = open(gloveFile,'r')
model = {}
for line in f:
splitLine = line.split()
word = splitLine[0]
embedding = np.array([float(val) for val in splitLine[1:]], dtype=np.float64)
model[word] = embedding
self.unk = np.mean(list(model.values()), axis=0)
print("Done! Model Loaded. Vector Size is "+str(len(model["hello"])))
f.close()
return model
def get_embeddings(self, words):
# return [self.model[x] for x in words]
ret = [self.model[x] if x in self.model else self.unk for x in words]
if len(ret) == 0:
ret = [self.unk]
return ret
class FastTextVectors(object):
"""docstring for FastTextVectors."""
def __init__(self, filename):
# add testing code here
print("Loading Model")
if filename[-3:] == "vec":
self.model = KeyedVectors.load_word2vec_format(filename)
else:
self.model = FastText.load_fasttext_format(filename)
print("Done! Model Loaded. Vector Size is "+str(len(self.model["hello"])))
def get_embeddings(self, words):
ret = [self.model[x] for x in words if x in self.model.vocab]
if len(ret) == 0:
return [self.model["unk"]]
return ret