diff --git a/README.md b/README.md index 5195fa1..0f93f40 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ This approach aims to significantly enrich the metadata of scientific articles, ## Install ```bash -pip install git+https://github.com/p2m2/encoder-ontology-match-abstract.git@alpha-20241031 +pip install git+https://github.com/p2m2/encoder-ontology-match-abstract.git@20250120 curl -O https://raw.githubusercontent.com/p2m2/encoder-ontology-match-abstract/refs/heads/main/llm_semantic_annotator.sh ``` diff --git a/exec.sh b/exec.sh index 652b477..1aa47de 100755 --- a/exec.sh +++ b/exec.sh @@ -5,21 +5,21 @@ help() { Usage: $0 Commands: - 1. Pseudo workflow [2,4,5,6,7] + 1. Pseudo workflow [2,3,4,5,6,7] 2. Populate OWL tag embeddings 3. Populate abstract embeddings 4. Compute similarities between tags and abstract chunks - 5. Display similarities information - 6. Build turtle knowledge graph - 7. Build dataset abstracts annotations CSV file + 5. Build turtle knowledge graph + 6. Build dataset abstracts annotations CSV file + 7. Display similarities information Details: 2: Compute TAG embeddings for all ontologies defined in the populate_owl_tag_embeddings section 3: Compute ABSTRACT embeddings (title + sentences) for all abstracts in the dataset 4: Compute similarities between TAGS and ABSTRACTS - 5: Display similarities information on the console - 6: Generate turtle file with information {score, tag} for each DOI - 7: Generate CSV file with [doi, tag, pmid, reference_id] + 5: Generate turtle file with information {score, tag} for each DOI + 6: Generate CSV file with [doi, tag, pmid, reference_id] + 7: Display similarities information on the console EOF } @@ -81,9 +81,9 @@ execute_command() { 2) run_command python3 -m llm_semantic_annotator "$config_file" populate_owl_tag_embeddings ;; 3) run_command python3 -m llm_semantic_annotator "$config_file" populate_abstract_embeddings ;; 4) run_command python3 -m llm_semantic_annotator "$config_file" compute_tag_chunk_similarities ;; - 5) run_command python3 -m llm_semantic_annotator "$config_file" display_summary ;; - 6) run_command python3 -m llm_semantic_annotator "$config_file" build_rdf_graph ;; - 7) run_command python3 -m llm_semantic_annotator "$config_file" build_dataset_abstracts_annotations ;; + 5) run_command python3 -m llm_semantic_annotator "$config_file" build_rdf_graph ;; + 6) run_command python3 -m llm_semantic_annotator "$config_file" build_dataset_abstracts_annotations ;; + 7) run_command python3 -m llm_semantic_annotator "$config_file" display_summary ;; *) echo "Invalid option" ;; esac } @@ -97,6 +97,7 @@ case $command in run_command python3 -m llm_semantic_annotator "$config_file" populate_abstract_embeddings run_command python3 -m llm_semantic_annotator "$config_file" compute_tag_chunk_similarities run_command python3 -m llm_semantic_annotator "$config_file" build_rdf_graph + run_command python3 -m llm_semantic_annotator "$config_file" build_dataset_abstracts_annotations run_command python3 -m llm_semantic_annotator "$config_file" display_summary ;; 2|3|4|5|6|7) diff --git a/llm_semantic_annotator.sh b/llm_semantic_annotator.sh index 652b477..5905ede 100755 --- a/llm_semantic_annotator.sh +++ b/llm_semantic_annotator.sh @@ -5,21 +5,22 @@ help() { Usage: $0 Commands: - 1. Pseudo workflow [2,4,5,6,7] + 1. Pseudo workflow [2,3,4,5,6,7] 2. Populate OWL tag embeddings 3. Populate abstract embeddings 4. Compute similarities between tags and abstract chunks - 5. Display similarities information - 6. Build turtle knowledge graph - 7. Build dataset abstracts annotations CSV file + 5. Build turtle knowledge graph + 6. Build dataset abstracts annotations CSV file + 7. Display similarities information Details: 2: Compute TAG embeddings for all ontologies defined in the populate_owl_tag_embeddings section 3: Compute ABSTRACT embeddings (title + sentences) for all abstracts in the dataset 4: Compute similarities between TAGS and ABSTRACTS - 5: Display similarities information on the console - 6: Generate turtle file with information {score, tag} for each DOI - 7: Generate CSV file with [doi, tag, pmid, reference_id] + 5: Generate turtle file with information {score, tag} for each DOI + 6: Generate CSV file with [doi, tag, pmid, reference_id] + 7: Display similarities information on the console + EOF } @@ -81,9 +82,9 @@ execute_command() { 2) run_command python3 -m llm_semantic_annotator "$config_file" populate_owl_tag_embeddings ;; 3) run_command python3 -m llm_semantic_annotator "$config_file" populate_abstract_embeddings ;; 4) run_command python3 -m llm_semantic_annotator "$config_file" compute_tag_chunk_similarities ;; - 5) run_command python3 -m llm_semantic_annotator "$config_file" display_summary ;; - 6) run_command python3 -m llm_semantic_annotator "$config_file" build_rdf_graph ;; - 7) run_command python3 -m llm_semantic_annotator "$config_file" build_dataset_abstracts_annotations ;; + 5) run_command python3 -m llm_semantic_annotator "$config_file" build_rdf_graph ;; + 6) run_command python3 -m llm_semantic_annotator "$config_file" build_dataset_abstracts_annotations ;; + 7) run_command python3 -m llm_semantic_annotator "$config_file" display_summary ;; *) echo "Invalid option" ;; esac } @@ -97,7 +98,8 @@ case $command in run_command python3 -m llm_semantic_annotator "$config_file" populate_abstract_embeddings run_command python3 -m llm_semantic_annotator "$config_file" compute_tag_chunk_similarities run_command python3 -m llm_semantic_annotator "$config_file" build_rdf_graph - run_command python3 -m llm_semantic_annotator "$config_file" display_summary + run_command python3 -m llm_semantic_annotator "$config_file" build_dataset_abstracts_annotations + run_command python3 -m llm_semantic_annotator "$config_file" display_summary ;; 2|3|4|5|6|7) execute_command $command diff --git a/llm_semantic_annotator/abstract/abstract_manager.py b/llm_semantic_annotator/abstract/abstract_manager.py index 69fddc9..b133869 100644 --- a/llm_semantic_annotator/abstract/abstract_manager.py +++ b/llm_semantic_annotator/abstract/abstract_manager.py @@ -251,7 +251,9 @@ def build_dataset_abstracts_annotations(self): graphs = self.tags_manager.get_graphs_ontologies() ascendants_dict = defaultdict(list) debut = time.time() - self.build_ascendants_terms(ascendants_dict,graphs) + + # inference desactivée ! + #self.build_ascendants_terms(ascendants_dict,graphs) duree = time.time() - debut print(f"loading terms with ancestors : {duree:.4f} secondes")