From 67ad09b656ade4b02acef1fd1b0097e580030877 Mon Sep 17 00:00:00 2001 From: Olivier Filangi Date: Mon, 28 Oct 2024 09:38:44 +0100 Subject: [PATCH] fix name csv --- config/mesh-demo.json | 2 +- llm_semantic_annotator/abstract/abstract_manager.py | 2 +- llm_semantic_annotator/similarity/model_embedding_manager.py | 5 +++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/config/mesh-demo.json b/config/mesh-demo.json index ad572a2..889c534 100644 --- a/config/mesh-demo.json +++ b/config/mesh-demo.json @@ -1,6 +1,6 @@ { "encodeur" : "sentence-transformers/all-MiniLM-L6-v2", - "threshold_similarity_tag_chunk" : 0.60, + "threshold_similarity_tag_chunk" : 0.70, "threshold_similarity_tag" : 0.80, "batch_size" : 32, diff --git a/llm_semantic_annotator/abstract/abstract_manager.py b/llm_semantic_annotator/abstract/abstract_manager.py index a7f2a17..35163ac 100644 --- a/llm_semantic_annotator/abstract/abstract_manager.py +++ b/llm_semantic_annotator/abstract/abstract_manager.py @@ -263,7 +263,7 @@ def build_dataset_abstracts_annotations(self): abstracts_origin_gen = filename.split('.json')[0] abstracts_gen = self.mem.get_filename_pth(abstracts_origin_gen).split('.pth')[0] abstracts_scores = abstracts_gen+"_scores.json" - abstracts_annotations_results_file = abstracts_gen+"_queryresults.json" + abstracts_annotations_results_file = abstracts_gen+"_queryresults.csv" print(abstracts_annotations_results_file) if os.path.exists(abstracts_annotations_results_file): print(f"{abstracts_annotations_results_file} already exists !") diff --git a/llm_semantic_annotator/similarity/model_embedding_manager.py b/llm_semantic_annotator/similarity/model_embedding_manager.py index 6887646..087a37d 100644 --- a/llm_semantic_annotator/similarity/model_embedding_manager.py +++ b/llm_semantic_annotator/similarity/model_embedding_manager.py @@ -45,8 +45,9 @@ class ModelEmbeddingManager(metaclass=Singleton): def __init__(self, config): self.config = config self.retention_dir = config['retention_dir'] - self.model_suffix="all-MiniLM-L6-v2" - self.model_name = config.get('encodeur', 'sentence-transformers/all-MiniLM-L6-v2') + self.encoder=config['encodeur'] + self.model_suffix=self.encoder.split('/')[-1] + self.model_name = config.get('encodeur', self.encoder) self.model = SentenceTransformer(self.model_name) self.model.similarity_fn_name = SimilarityFunction.MANHATTAN self.batch_size = config.get('batch_size', 32)