-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_CustomModelClient.py
59 lines (49 loc) · 2.17 KB
/
test_CustomModelClient.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from models.embedding_model import CustomEmbeddingModelClient
from lightrag.core import Embedder
from sentence_transformers.util import pytorch_cos_sim
def parse_embedding(embedding):
return embedding.data[0].embedding
def get_scalar(similarity_score) -> float:
return similarity_score.numpy()[0][0].tolist()
def get_sentence_similarities(sentence_ref: str, sentence_to_compare_to: dict, embedder: Embedder) -> None:
embedding_ref = embedder(sentence_ref)
parsed_embedding_ref = parse_embedding(embedding_ref)
for sentence in sentence_to_compare_to.keys():
embedding = embedder(sentence_to_compare_to[sentence]["text"])
parsed_embedding = parse_embedding(embedding)
sim_to_sentence_ref = pytorch_cos_sim(parsed_embedding_ref, parsed_embedding)
sentence_to_compare_to[sentence]["similarity_score"] = get_scalar(sim_to_sentence_ref)
def test_CustomModelClient(sentence_to_compare_to: dict) -> None:
for name, dic in sentence_to_compare_to.items():
print("-"*24, name, "-"*24)
rounded_score = round(dic["similarity_score"], 3)
assert rounded_score == dic["ground_truth"]
print("test passed.")
if __name__ =="__main__":
MODEL_PATH = "sentence-transformers/all-MiniLM-L6-v2"
TOKENIZER_PATH = "sentence-transformers/all-MiniLM-L6-v2"
PERSIST_DIR = "persist_temp/"
# test data. reference: https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
sentence_ref = "That is a happy person"
sentence_to_compare_to = {
"sentence_1": {
"ground_truth": 0.695,
"text": "That is a happy dog"
},
"sentence_2": {
"ground_truth": 0.943,
"text": "That is a very happy person"
},
"sentence_3": {
"ground_truth": 0.257,
"text": "Today is a sunny day"
}
}
# load embedding model
model_kwargs = {
"model": MODEL_PATH
}
local_embedder = Embedder(model_client=CustomEmbeddingModelClient(), model_kwargs=model_kwargs)
# run test
get_sentence_similarities(sentence_ref, sentence_to_compare_to, local_embedder)
test_CustomModelClient(sentence_to_compare_to)