Unit tests for load_workflow_process_nodes and cycle with Metagenome …

…Sequencing
microbiomedata · Nov 20, 2024 · 83a99e2 · 83a99e2
1 parent 0e81082
commit 83a99e2
Show file tree

Hide file tree

Showing 3 changed files with 67 additions and 9 deletions.
diff --git a/nmdc_automation/workflow_automation/sched.py b/nmdc_automation/workflow_automation/sched.py
@@ -294,12 +294,15 @@ def cycle(self, dryrun: bool = False, skiplist: set = set(),
         self.get_existing_jobs.cache_clear()
         job_recs = []
         for wfp_node in wfp_nodes:
+            # Skip if we are in the skiplist
             if wfp_node.was_informed_by in skiplist:
                 logging.debug(f"Skipping: {wfp_node.was_informed_by}")
                 continue
+            # Skip if the workflow is disabled
             if not wfp_node.workflow.enabled:
                 logging.debug(f"Skipping: {wfp_node.id}, workflow disabled.")
                 continue
+            # Find new jobs
             jobs = self.find_new_jobs(wfp_node)
             for job in jobs:
                 if dryrun:

diff --git a/tests/test_sched.py b/tests/test_sched.py
@@ -36,6 +36,27 @@ def test_scheduler_cycle(test_db, mock_api, workflow_file, workflows_config_dir,
     resp = jm.cycle()
     assert len(resp) == exp_num_jobs_cycle_1
 
+
+def test_scheduler_cycle_metagenome_sequencing_workflow(test_db, mock_api, workflows_config_dir, site_config_file):
+    """
+    Test basic job creation for metagenome sequencing workflow. Metagenome Sequencing is treated as a WorkflowExecution
+    instead of a DataGeneration, used for external sequencing data.
+    """
+    reset_db(test_db)
+    load_fixture(test_db, "data_object_set.json")
+    load_fixture(test_db, "metagenome_sequencing.json", "workflow_execution_set")
+
+    jm = Scheduler(test_db, wfn=workflows_config_dir / "workflows.yaml",
+                   site_conf=site_config_file)
+    resp = jm.cycle()
+    assert len(resp) == 1
+    # Expect 1 unclaimed ReadQcAnalysis job in the response
+    job = resp[0]
+    assert job["claims"] == []
+    assert job["config"]["activity"]["type"] == "nmdc:ReadQcAnalysis"
+
+
+
 @mark.parametrize("workflow_file", [
     "workflows.yaml",
     "workflows-mt.yaml"

diff --git a/tests/test_workflow_process.py b/tests/test_workflow_process.py
@@ -11,7 +11,7 @@
 )
 def test_load_workflow_process_nodes(test_db, workflow_file, workflows_config_dir):
     """
-    Test
+    Test loading of WorkflowProcessNode objects starting with DataObjects, DataGenerations, and WorkflowExecutions
     """
     metatranscriptome = False
     if workflow_file == "workflows-mt.yaml":
@@ -24,27 +24,56 @@ def test_load_workflow_process_nodes(test_db, workflow_file, workflows_config_di
 
     wfs = load_workflow_configs(workflows_config_dir / workflow_file)
 
-    # these are called by load_activities
+    # these are called by load_workflow_process_nodes
     data_objs_by_id = get_required_data_objects_map(test_db, wfs)
     wf_execs = get_current_workflow_process_nodes(test_db, wfs, data_objs_by_id)
     assert wf_execs
     assert len(wf_execs) == 2
 
-    acts = load_workflow_process_nodes(test_db, wfs)
+    wp_nodes = load_workflow_process_nodes(test_db, wfs)
     # sanity check
-    assert acts
-    assert len(acts) == 2
+    assert wp_nodes
+    assert len(wp_nodes) == 2
 
     # Omics and RQC share data_object_type for metagenome and metatranscriptome
     # they can be distinguished by analyte category so we expect 1 of each
     # for metagenome and metatranscriptome
-    data_gen = [act for act in acts if act.type == "nmdc:NucleotideSequencing"][0]
+    data_gen = [wp_node for wp_node in wp_nodes if wp_node.type == "nmdc:NucleotideSequencing"][0]
     assert data_gen
     assert data_gen.children
     assert len(data_gen.children) == 1
     assert data_gen.children[0].type == "nmdc:ReadQcAnalysis"
 
 
+def test_load_workflow_process_nodes_metagenome_sequencing_workflow(test_db, workflows_config_dir):
+    """
+    Test loading of WorkflowProcessNode objects starting with external sequencing data
+    which is treated as a WorkflowExecution instead of a DataGeneration
+    """
+    reset_db(test_db)
+    load_fixture(test_db, "data_object_set.json")
+    load_fixture(test_db, "metagenome_sequencing.json", "workflow_execution_set")
+
+    wfs = load_workflow_configs(workflows_config_dir / "workflows.yaml")
+
+    # these are called by load_workflow_process_nodes
+    data_objs_by_id = get_required_data_objects_map(test_db, wfs)
+    # sanity check
+    assert data_objs_by_id
+    wf_execs = get_current_workflow_process_nodes(test_db, wfs, data_objs_by_id)
+    assert wf_execs
+    assert len(wf_execs) == 1
+
+    wp_nodes = load_workflow_process_nodes(test_db, wfs)
+    # sanity check
+    assert wp_nodes
+    assert len(wp_nodes) == 1
+    node = wp_nodes[0]
+    assert node.type == "nmdc:MetagenomeSequencing"
+
+
+
+
 @mark.parametrize(
     "workflow_file", ["workflows.yaml", "workflows-mt.yaml"]
 )
@@ -58,14 +87,12 @@ def test_load_workflows(workflows_config_dir, workflow_file):
 
     shared_wf_names = ["Sequencing Noninterleaved", "Sequencing Interleaved"]
     if metatranscriptome:
-        exp_num_wfs = 9
         exp_wf_names = ["Metatranscriptome Reads QC", "Metatranscriptome Reads QC Interleave",
                         "Metatranscriptome Assembly", "Metatranscriptome Annotation", "Expression Analysis Antisense",
                         "Expression Analysis Sense", "Expression Analysis Nonstranded", ]
     else:
-        exp_num_wfs = 8
         exp_wf_names = ["Reads QC", "Reads QC Interleave", "Metagenome Assembly", "Metagenome Annotation", "MAGs",
-                        "Readbased Analysis", ]
+                        "Readbased Analysis", "Metagenome Sequencing" ]
 
     wfs = load_workflow_configs(workflows_config_dir / workflow_file)
     assert wfs
@@ -77,6 +104,13 @@ def test_load_workflows(workflows_config_dir, workflow_file):
         assert wf_name in wfm
         wf = wfm[wf_name]
         assert wf is not None
+        # Metagenome Sequencing is a special case
+        if wf_name == "Metagenome Sequencing":
+            assert wf.collection is not None
+            assert wf.enabled
+            assert wf.analyte_category == "Metagenome"
+            continue
+        # Normal workflow
         assert wf.type is not None
         assert wf.name is not None
         assert wf.collection is not None