Skip to content

Commit

Permalink
Subclass Sync - Direct in source, indirect in Mondo
Browse files Browse the repository at this point in the history
- Update: Formalized new output *confirmed-direct-source-indirect-mondo.robot.tsv
  • Loading branch information
joeflack4 committed Dec 3, 2024
1 parent 2055a00 commit 4d366d4
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 5 deletions.
7 changes: 6 additions & 1 deletion src/ontology/mondo-ingest.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ sync: sync-subclassof sync-synonyms

# Synchronization: SubclassOf
.PHONY: sync-subclassof
sync-subclassof: $(REPORTDIR)/sync-subClassOf.confirmed.tsv $(REPORTDIR)/sync-subClassOf.direct-in-mondo-only.tsv $(TMPDIR)/sync-subClassOf.added.self-parentage.tsv
sync-subclassof: $(REPORTDIR)/sync-subClassOf.confirmed.tsv $(REPORTDIR)/sync-subClassOf.confirmed-direct-source-indirect-mondo.tsv $(REPORTDIR)/sync-subClassOf.direct-in-mondo-only.tsv $(TMPDIR)/sync-subClassOf.added.self-parentage.tsv

# todo: drop this? This is really just an alias here for quality of life, but not used by anything.
.PHONY: sync-subclassof-%
Expand All @@ -570,11 +570,16 @@ $(REPORTDIR)/sync-subClassOf.direct-in-mondo-only.tsv: $(foreach n,$(ALL_COMPONE
$(REPORTDIR)/sync-subClassOf.confirmed.tsv: $(foreach n,$(ALL_COMPONENT_IDS), $(REPORTDIR)/$(n).subclass.confirmed.robot.tsv)
awk '(NR == 1) || (NR == 2) || (FNR > 2)' $(REPORTDIR)/*.subclass.confirmed.robot.tsv > $@

# TODO: implement this goal && add to master goal
$(REPORTDIR)/sync-subClassOf.confirmed-direct-source-indirect-mondo.tsv: $(foreach n,$(ALL_COMPONENT_IDS), $(REPORTDIR)/$(n).subclass.confirmed-direct-source-indirect-mondo.robot.tsv)
awk '(NR == 1) || (NR == 2) || (FNR > 2)' $(REPORTDIR)/*.subclass.confirmed-direct-source-indirect-mondo.robot.tsv > $@

$(REPORTDIR)/%.subclass.confirmed.robot.tsv $(REPORTDIR)/%.subclass.added.robot.tsv $(REPORTDIR)/%.subclass.added-obsolete.robot.tsv $(REPORTDIR)/%.subclass.direct-in-mondo-only.tsv $(TMPDIR)/%.subclass.self-parentage.tsv: $(TMPDIR)/mondo-ingest.db $(TMPDIR)/mondo.db $(TMPDIR)/mondo.sssom.tsv
python3 $(SCRIPTSDIR)/sync_subclassof.py \
--outpath-added $(REPORTDIR)/$*.subclass.added.robot.tsv \
--outpath-added-obsolete $(REPORTDIR)/$*.subclass.added-obsolete.robot.tsv \
--outpath-confirmed $(REPORTDIR)/$*.subclass.confirmed.robot.tsv \
--outpath-confirmed-direct-source-indirect-mondo $(REPORTDIR)/$*.subclass.confirmed-direct-source-indirect-mondo.robot.tsv \
--outpath-direct-in-mondo-only $(REPORTDIR)/$*.subclass.direct-in-mondo-only.tsv \
--outpath-self-parentage $(TMPDIR)/$*.subclass.self-parentage.tsv \
--mondo-db-path $(TMPDIR)/mondo.db \
Expand Down
15 changes: 11 additions & 4 deletions src/scripts/sync_subclassof.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ def _convert_edge_namespace(

def sync_subclassof(
outpath_added: str = EX_DEFAULTS['outpath_added'], outpath_confirmed: str = EX_DEFAULTS['outpath_confirmed'],
outpath_confirmed_direct_source_indirect_mondo: str = EX_DEFAULTS['outpath_confirmed_direct_source_indirect_mondo'],
outpath_added_obsolete: str = EX_DEFAULTS['outpath_added_obsolete'],
mondo_db_path: str = EX_DEFAULTS['mondo_db_path'], mondo_ingest_db_path: str = EX_DEFAULTS['mondo_ingest_db_path'],
mondo_mappings_path: str = EX_DEFAULTS['mondo_mappings_path'],
Expand Down Expand Up @@ -402,8 +403,7 @@ def sync_subclassof(
_confirmed_df(in_both_direct, outpath_confirmed)

# Case 2: SCR is direct in source, but indirect Mondo
_confirmed_df(in_source_direct_mondo_indirect,
outpath_confirmed.replace('confirmed', 'confirmed-direct-source-indirect-mondo'))
_confirmed_df(in_source_direct_mondo_indirect, outpath_confirmed_direct_source_indirect_mondo)

# Case 3: SCR is direct in the source, but not at all in Mondo
subheader = deepcopy(ROBOT_SUBHEADER)
Expand Down Expand Up @@ -486,8 +486,13 @@ def cli(): # todo: #remove-temp-defaults
'into Mondo, except for that these terms are obsolete in Mondo.')
parser.add_argument(
'-c', '--outpath-confirmed', required=False, default=EX_DEFAULTS['outpath_confirmed'],
help='Path to output robot template containing subclass relations for given ontology that exist in Mondo and '
'are confirmed to also exist in the source.')
help='Path to output robot template containing direct subclass relations for given ontology that exist in '
'Mondo and are confirmed to also exist in the source.')
parser.add_argument(
'-C', '--confirmed-direct-source-indirect-mondo', required=False,
default=EX_DEFAULTS['outpath_confirmed_direct_source_indirect_mondo'],
help='Path to output robot template containing subclass relations for given ontology that exist in Mondo as '
'indirect relations and are confirmed to also exist in the source as direct relations.')
parser.add_argument(
'-M', '--outpath-direct-in-mondo-only', required=False,
default=EX_DEFAULTS['outpath_direct_in_mondo_only'],
Expand Down Expand Up @@ -526,6 +531,8 @@ def run_defaults(use_cache=True): # todo: #remove-temp-defaults
sync_subclassof(**{
'outpath_added': str(REPORTS_DIR / f'{name}.subclass.added.robot.tsv'),
'outpath_confirmed': str(REPORTS_DIR / f'{name}.subclass.confirmed.robot.tsv'),
'outpath_confirmed_direct_source_indirect_mondo': \
str(REPORTS_DIR / f'{name}.subclass.confirmed-direct-source-indirect-mondo.robot.tsv'),
'onto_config_path': str(METADATA_DIR / f'{name}.yml'),
'mondo_db_path': str(TMP_DIR / 'mondo.db'),
'mondo_ingest_db_path': str(TMP_DIR / 'mondo-ingest.db'),
Expand Down
2 changes: 2 additions & 0 deletions src/scripts/sync_subclassof_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
'outpath_added': str(REPORTS_DIR / f'{EX_ONTO_NAME}.subclass.added.robot.tsv'),
'outpath_added_obsolete': str(REPORTS_DIR / f'{EX_ONTO_NAME}.subclass.added-obsolete.robot.tsv'),
'outpath_confirmed': str(REPORTS_DIR / f'{EX_ONTO_NAME}.subclass.confirmed.robot.tsv'),
'outpath_confirmed_direct_source_indirect_mondo': \
str(REPORTS_DIR / f'{EX_ONTO_NAME}.subclass.confirmed-direct-source-indirect-mondo.robot.tsv'),
'onto_config_path': str(METADATA_DIR / f'{EX_ONTO_NAME}.yml'),
'mondo_db_path': str(TMP_DIR / 'mondo.db'),
'mondo_ingest_db_path': str(TMP_DIR / 'mondo-ingest.db'),
Expand Down

0 comments on commit 4d366d4

Please sign in to comment.