diff --git a/config/sylvain.json b/config/sylvain.json new file mode 100644 index 0000000..bc0f1e6 --- /dev/null +++ b/config/sylvain.json @@ -0,0 +1,70 @@ +{ + "encodeur" : "sentence-transformers/all-MiniLM-L6-v2", + "threshold_similarity_tag_chunk" : 0.49, + "threshold_similarity_tag" : 0.80, + "batch_size" : 32, + + "populate_owl_tag_embeddings" : { + "ontologies": { + "planteome_link" : { + "peco": { + "url": "http://purl.obolibrary.org/obo/peco.owl", + "prefix": "http://purl.obolibrary.org/obo/PECO_", + "format": "xml", + "label" : "", + "properties": [""] + }, + "po": { + "url": "http://purl.obolibrary.org/obo/po.owl", + "prefix": "http://purl.obolibrary.org/obo/PO_", + "format": "xml", + "label" : "", + "properties": [""] + }, + "pso": { + "url": "http://purl.obolibrary.org/obo/pso.owl", + "prefix": "http://purl.obolibrary.org/obo/PSO_", + "format": "xml", + "label" : "", + "properties": [""] + }, + "to": { + "url": "http://purl.obolibrary.org/obo/to.owl", + "prefix": "http://purl.obolibrary.org/obo/TO_", + "format": "xml", + "label" : "", + "properties": [""] + }, + "pato": { + "url": "http://purl.obolibrary.org/obo/pato.owl", + "prefix": "http://purl.obolibrary.org/obo/PATO_", + "format": "xml", + "label" : "", + "properties": [""] + } + }, + "mesh_link" : { + "mesh": { + "filepath" : "data/mesh/2024.ttl", + "prefix": "http://id.nlm.nih.gov/mesh/", + "format": "turtle", + "label" : "", + "properties": [""] + } + } + }, + "debug_nb_terms_by_ontology" : -1 + }, + "populate_ncbi_taxon_tag_embeddings" : { + "regex" : "(assic.*)|(ola)" , + "tags_per_file" : 2000 + }, + "populate_abstract_embeddings" : { + "abstracts_per_file" : 50, + "from_file" : { + "json_files" : [ + "data/abstracts/sylvain.json" + ] + } + } +} diff --git a/config/test_lotus.json b/config/test_lotus.json new file mode 100644 index 0000000..060b0e8 --- /dev/null +++ b/config/test_lotus.json @@ -0,0 +1,83 @@ +{ + "encodeur" : "sentence-transformers/all-MiniLM-L6-v2", + "threshold_similarity_tag_chunk" : 0.49, + "threshold_similarity_tag" : 0.80, + "batch_size" : 32, + + "populate_owl_tag_embeddings" : { + "ontologies": { + "planteome_link" : { + "peco": { + "url": "http://purl.obolibrary.org/obo/peco.owl", + "prefix": "http://purl.obolibrary.org/obo/PECO_", + "format": "xml", + "label" : "", + "properties": [""] + }, + "po": { + "url": "http://purl.obolibrary.org/obo/po.owl", + "prefix": "http://purl.obolibrary.org/obo/PO_", + "format": "xml", + "label" : "", + "properties": [""] + }, + "pso": { + "url": "http://purl.obolibrary.org/obo/pso.owl", + "prefix": "http://purl.obolibrary.org/obo/PSO_", + "format": "xml", + "label" : "", + "properties": [""] + }, + "to": { + "url": "http://purl.obolibrary.org/obo/to.owl", + "prefix": "http://purl.obolibrary.org/obo/TO_", + "format": "xml", + "label" : "", + "properties": [""] + } + }, + "technology_link" : { + "ms": { + "url": "http://purl.obolibrary.org/obo/ms.owl", + "prefix": "http://purl.obolibrary.org/obo/MS_", + "format": "xml", + "label" : "", + "properties": [""] + } + }, + "mesh_link" : { + "mesh": { + "filepath" : "data/mesh/2024.ttl", + "prefix": "http://id.nlm.nih.gov/mesh/", + "format": "turtle", + "label" : "", + "properties": [""] + } + }, + "chemical_link" : { + "chmo" : { + "url": "http://purl.obolibrary.org/obo/chmo.owl", + "prefix": "http://purl.obolibrary.org/obo/CHMO_", + "format": "xml", + "label" : "", + "properties": [""] + + } + } + }, + "debug_nb_terms_by_ontology" : -1 + }, + "populate_ncbi_taxon_tag_embeddings" : { + "regex" : "(assic.*)|(ola)" , + "tags_per_file" : 2000 + }, + "populate_abstract_embeddings" : { + "abstracts_per_file" : 50, + "from_file" : { + "json_files" : [ + "data/abstracts/abstracts_Identification_and_Quantification_of_Glucosinolates.json" + ] + } + + } +} diff --git a/data/abstracts/sylvain.json b/data/abstracts/sylvain.json new file mode 100644 index 0000000..786b09c --- /dev/null +++ b/data/abstracts/sylvain.json @@ -0,0 +1,7 @@ +[ + { + "title": "--", + "abstract": "Water loss data during leaf dehydratation to better understand species survival during drought events. 7 replicates per species.", + "doi": "--" + } +] \ No newline at end of file