From 83a99e224140801f701b4b4927f1d290f5da3253 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Wed, 20 Nov 2024 12:42:23 -0800 Subject: [PATCH] Unit tests for load_workflow_process_nodes and cycle with Metagenome Sequencing --- nmdc_automation/workflow_automation/sched.py | 3 ++ tests/test_sched.py | 21 ++++++++ tests/test_workflow_process.py | 52 ++++++++++++++++---- 3 files changed, 67 insertions(+), 9 deletions(-) diff --git a/nmdc_automation/workflow_automation/sched.py b/nmdc_automation/workflow_automation/sched.py index 8ddf92d5..c43ab268 100644 --- a/nmdc_automation/workflow_automation/sched.py +++ b/nmdc_automation/workflow_automation/sched.py @@ -294,12 +294,15 @@ def cycle(self, dryrun: bool = False, skiplist: set = set(), self.get_existing_jobs.cache_clear() job_recs = [] for wfp_node in wfp_nodes: + # Skip if we are in the skiplist if wfp_node.was_informed_by in skiplist: logging.debug(f"Skipping: {wfp_node.was_informed_by}") continue + # Skip if the workflow is disabled if not wfp_node.workflow.enabled: logging.debug(f"Skipping: {wfp_node.id}, workflow disabled.") continue + # Find new jobs jobs = self.find_new_jobs(wfp_node) for job in jobs: if dryrun: diff --git a/tests/test_sched.py b/tests/test_sched.py index 6099999e..57b738cb 100644 --- a/tests/test_sched.py +++ b/tests/test_sched.py @@ -36,6 +36,27 @@ def test_scheduler_cycle(test_db, mock_api, workflow_file, workflows_config_dir, resp = jm.cycle() assert len(resp) == exp_num_jobs_cycle_1 + +def test_scheduler_cycle_metagenome_sequencing_workflow(test_db, mock_api, workflows_config_dir, site_config_file): + """ + Test basic job creation for metagenome sequencing workflow. Metagenome Sequencing is treated as a WorkflowExecution + instead of a DataGeneration, used for external sequencing data. + """ + reset_db(test_db) + load_fixture(test_db, "data_object_set.json") + load_fixture(test_db, "metagenome_sequencing.json", "workflow_execution_set") + + jm = Scheduler(test_db, wfn=workflows_config_dir / "workflows.yaml", + site_conf=site_config_file) + resp = jm.cycle() + assert len(resp) == 1 + # Expect 1 unclaimed ReadQcAnalysis job in the response + job = resp[0] + assert job["claims"] == [] + assert job["config"]["activity"]["type"] == "nmdc:ReadQcAnalysis" + + + @mark.parametrize("workflow_file", [ "workflows.yaml", "workflows-mt.yaml" diff --git a/tests/test_workflow_process.py b/tests/test_workflow_process.py index 1cf81058..abd1924e 100644 --- a/tests/test_workflow_process.py +++ b/tests/test_workflow_process.py @@ -11,7 +11,7 @@ ) def test_load_workflow_process_nodes(test_db, workflow_file, workflows_config_dir): """ - Test + Test loading of WorkflowProcessNode objects starting with DataObjects, DataGenerations, and WorkflowExecutions """ metatranscriptome = False if workflow_file == "workflows-mt.yaml": @@ -24,27 +24,56 @@ def test_load_workflow_process_nodes(test_db, workflow_file, workflows_config_di wfs = load_workflow_configs(workflows_config_dir / workflow_file) - # these are called by load_activities + # these are called by load_workflow_process_nodes data_objs_by_id = get_required_data_objects_map(test_db, wfs) wf_execs = get_current_workflow_process_nodes(test_db, wfs, data_objs_by_id) assert wf_execs assert len(wf_execs) == 2 - acts = load_workflow_process_nodes(test_db, wfs) + wp_nodes = load_workflow_process_nodes(test_db, wfs) # sanity check - assert acts - assert len(acts) == 2 + assert wp_nodes + assert len(wp_nodes) == 2 # Omics and RQC share data_object_type for metagenome and metatranscriptome # they can be distinguished by analyte category so we expect 1 of each # for metagenome and metatranscriptome - data_gen = [act for act in acts if act.type == "nmdc:NucleotideSequencing"][0] + data_gen = [wp_node for wp_node in wp_nodes if wp_node.type == "nmdc:NucleotideSequencing"][0] assert data_gen assert data_gen.children assert len(data_gen.children) == 1 assert data_gen.children[0].type == "nmdc:ReadQcAnalysis" +def test_load_workflow_process_nodes_metagenome_sequencing_workflow(test_db, workflows_config_dir): + """ + Test loading of WorkflowProcessNode objects starting with external sequencing data + which is treated as a WorkflowExecution instead of a DataGeneration + """ + reset_db(test_db) + load_fixture(test_db, "data_object_set.json") + load_fixture(test_db, "metagenome_sequencing.json", "workflow_execution_set") + + wfs = load_workflow_configs(workflows_config_dir / "workflows.yaml") + + # these are called by load_workflow_process_nodes + data_objs_by_id = get_required_data_objects_map(test_db, wfs) + # sanity check + assert data_objs_by_id + wf_execs = get_current_workflow_process_nodes(test_db, wfs, data_objs_by_id) + assert wf_execs + assert len(wf_execs) == 1 + + wp_nodes = load_workflow_process_nodes(test_db, wfs) + # sanity check + assert wp_nodes + assert len(wp_nodes) == 1 + node = wp_nodes[0] + assert node.type == "nmdc:MetagenomeSequencing" + + + + @mark.parametrize( "workflow_file", ["workflows.yaml", "workflows-mt.yaml"] ) @@ -58,14 +87,12 @@ def test_load_workflows(workflows_config_dir, workflow_file): shared_wf_names = ["Sequencing Noninterleaved", "Sequencing Interleaved"] if metatranscriptome: - exp_num_wfs = 9 exp_wf_names = ["Metatranscriptome Reads QC", "Metatranscriptome Reads QC Interleave", "Metatranscriptome Assembly", "Metatranscriptome Annotation", "Expression Analysis Antisense", "Expression Analysis Sense", "Expression Analysis Nonstranded", ] else: - exp_num_wfs = 8 exp_wf_names = ["Reads QC", "Reads QC Interleave", "Metagenome Assembly", "Metagenome Annotation", "MAGs", - "Readbased Analysis", ] + "Readbased Analysis", "Metagenome Sequencing" ] wfs = load_workflow_configs(workflows_config_dir / workflow_file) assert wfs @@ -77,6 +104,13 @@ def test_load_workflows(workflows_config_dir, workflow_file): assert wf_name in wfm wf = wfm[wf_name] assert wf is not None + # Metagenome Sequencing is a special case + if wf_name == "Metagenome Sequencing": + assert wf.collection is not None + assert wf.enabled + assert wf.analyte_category == "Metagenome" + continue + # Normal workflow assert wf.type is not None assert wf.name is not None assert wf.collection is not None