From 4d366d438c960cc21bfbc72b53d0457a2410a9e1 Mon Sep 17 00:00:00 2001 From: Joe Flack Date: Mon, 2 Dec 2024 19:03:34 -0500 Subject: [PATCH] Subclass Sync - Direct in source, indirect in Mondo - Update: Formalized new output *confirmed-direct-source-indirect-mondo.robot.tsv --- src/ontology/mondo-ingest.Makefile | 7 ++++++- src/scripts/sync_subclassof.py | 15 +++++++++++---- src/scripts/sync_subclassof_config.py | 2 ++ 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/ontology/mondo-ingest.Makefile b/src/ontology/mondo-ingest.Makefile index 4186b392..35e34135 100644 --- a/src/ontology/mondo-ingest.Makefile +++ b/src/ontology/mondo-ingest.Makefile @@ -553,7 +553,7 @@ sync: sync-subclassof sync-synonyms # Synchronization: SubclassOf .PHONY: sync-subclassof -sync-subclassof: $(REPORTDIR)/sync-subClassOf.confirmed.tsv $(REPORTDIR)/sync-subClassOf.direct-in-mondo-only.tsv $(TMPDIR)/sync-subClassOf.added.self-parentage.tsv +sync-subclassof: $(REPORTDIR)/sync-subClassOf.confirmed.tsv $(REPORTDIR)/sync-subClassOf.confirmed-direct-source-indirect-mondo.tsv $(REPORTDIR)/sync-subClassOf.direct-in-mondo-only.tsv $(TMPDIR)/sync-subClassOf.added.self-parentage.tsv # todo: drop this? This is really just an alias here for quality of life, but not used by anything. .PHONY: sync-subclassof-% @@ -570,11 +570,16 @@ $(REPORTDIR)/sync-subClassOf.direct-in-mondo-only.tsv: $(foreach n,$(ALL_COMPONE $(REPORTDIR)/sync-subClassOf.confirmed.tsv: $(foreach n,$(ALL_COMPONENT_IDS), $(REPORTDIR)/$(n).subclass.confirmed.robot.tsv) awk '(NR == 1) || (NR == 2) || (FNR > 2)' $(REPORTDIR)/*.subclass.confirmed.robot.tsv > $@ +# TODO: implement this goal && add to master goal +$(REPORTDIR)/sync-subClassOf.confirmed-direct-source-indirect-mondo.tsv: $(foreach n,$(ALL_COMPONENT_IDS), $(REPORTDIR)/$(n).subclass.confirmed-direct-source-indirect-mondo.robot.tsv) + awk '(NR == 1) || (NR == 2) || (FNR > 2)' $(REPORTDIR)/*.subclass.confirmed-direct-source-indirect-mondo.robot.tsv > $@ + $(REPORTDIR)/%.subclass.confirmed.robot.tsv $(REPORTDIR)/%.subclass.added.robot.tsv $(REPORTDIR)/%.subclass.added-obsolete.robot.tsv $(REPORTDIR)/%.subclass.direct-in-mondo-only.tsv $(TMPDIR)/%.subclass.self-parentage.tsv: $(TMPDIR)/mondo-ingest.db $(TMPDIR)/mondo.db $(TMPDIR)/mondo.sssom.tsv python3 $(SCRIPTSDIR)/sync_subclassof.py \ --outpath-added $(REPORTDIR)/$*.subclass.added.robot.tsv \ --outpath-added-obsolete $(REPORTDIR)/$*.subclass.added-obsolete.robot.tsv \ --outpath-confirmed $(REPORTDIR)/$*.subclass.confirmed.robot.tsv \ + --outpath-confirmed-direct-source-indirect-mondo $(REPORTDIR)/$*.subclass.confirmed-direct-source-indirect-mondo.robot.tsv \ --outpath-direct-in-mondo-only $(REPORTDIR)/$*.subclass.direct-in-mondo-only.tsv \ --outpath-self-parentage $(TMPDIR)/$*.subclass.self-parentage.tsv \ --mondo-db-path $(TMPDIR)/mondo.db \ diff --git a/src/scripts/sync_subclassof.py b/src/scripts/sync_subclassof.py index f68909f3..0312d249 100644 --- a/src/scripts/sync_subclassof.py +++ b/src/scripts/sync_subclassof.py @@ -225,6 +225,7 @@ def _convert_edge_namespace( def sync_subclassof( outpath_added: str = EX_DEFAULTS['outpath_added'], outpath_confirmed: str = EX_DEFAULTS['outpath_confirmed'], + outpath_confirmed_direct_source_indirect_mondo: str = EX_DEFAULTS['outpath_confirmed_direct_source_indirect_mondo'], outpath_added_obsolete: str = EX_DEFAULTS['outpath_added_obsolete'], mondo_db_path: str = EX_DEFAULTS['mondo_db_path'], mondo_ingest_db_path: str = EX_DEFAULTS['mondo_ingest_db_path'], mondo_mappings_path: str = EX_DEFAULTS['mondo_mappings_path'], @@ -402,8 +403,7 @@ def sync_subclassof( _confirmed_df(in_both_direct, outpath_confirmed) # Case 2: SCR is direct in source, but indirect Mondo - _confirmed_df(in_source_direct_mondo_indirect, - outpath_confirmed.replace('confirmed', 'confirmed-direct-source-indirect-mondo')) + _confirmed_df(in_source_direct_mondo_indirect, outpath_confirmed_direct_source_indirect_mondo) # Case 3: SCR is direct in the source, but not at all in Mondo subheader = deepcopy(ROBOT_SUBHEADER) @@ -486,8 +486,13 @@ def cli(): # todo: #remove-temp-defaults 'into Mondo, except for that these terms are obsolete in Mondo.') parser.add_argument( '-c', '--outpath-confirmed', required=False, default=EX_DEFAULTS['outpath_confirmed'], - help='Path to output robot template containing subclass relations for given ontology that exist in Mondo and ' - 'are confirmed to also exist in the source.') + help='Path to output robot template containing direct subclass relations for given ontology that exist in ' + 'Mondo and are confirmed to also exist in the source.') + parser.add_argument( + '-C', '--confirmed-direct-source-indirect-mondo', required=False, + default=EX_DEFAULTS['outpath_confirmed_direct_source_indirect_mondo'], + help='Path to output robot template containing subclass relations for given ontology that exist in Mondo as ' + 'indirect relations and are confirmed to also exist in the source as direct relations.') parser.add_argument( '-M', '--outpath-direct-in-mondo-only', required=False, default=EX_DEFAULTS['outpath_direct_in_mondo_only'], @@ -526,6 +531,8 @@ def run_defaults(use_cache=True): # todo: #remove-temp-defaults sync_subclassof(**{ 'outpath_added': str(REPORTS_DIR / f'{name}.subclass.added.robot.tsv'), 'outpath_confirmed': str(REPORTS_DIR / f'{name}.subclass.confirmed.robot.tsv'), + 'outpath_confirmed_direct_source_indirect_mondo': \ + str(REPORTS_DIR / f'{name}.subclass.confirmed-direct-source-indirect-mondo.robot.tsv'), 'onto_config_path': str(METADATA_DIR / f'{name}.yml'), 'mondo_db_path': str(TMP_DIR / 'mondo.db'), 'mondo_ingest_db_path': str(TMP_DIR / 'mondo-ingest.db'), diff --git a/src/scripts/sync_subclassof_config.py b/src/scripts/sync_subclassof_config.py index 2b9d00ce..030f208e 100644 --- a/src/scripts/sync_subclassof_config.py +++ b/src/scripts/sync_subclassof_config.py @@ -30,6 +30,8 @@ 'outpath_added': str(REPORTS_DIR / f'{EX_ONTO_NAME}.subclass.added.robot.tsv'), 'outpath_added_obsolete': str(REPORTS_DIR / f'{EX_ONTO_NAME}.subclass.added-obsolete.robot.tsv'), 'outpath_confirmed': str(REPORTS_DIR / f'{EX_ONTO_NAME}.subclass.confirmed.robot.tsv'), + 'outpath_confirmed_direct_source_indirect_mondo': \ + str(REPORTS_DIR / f'{EX_ONTO_NAME}.subclass.confirmed-direct-source-indirect-mondo.robot.tsv'), 'onto_config_path': str(METADATA_DIR / f'{EX_ONTO_NAME}.yml'), 'mondo_db_path': str(TMP_DIR / 'mondo.db'), 'mondo_ingest_db_path': str(TMP_DIR / 'mondo-ingest.db'),