Skip to content

Commit

Permalink
Unit tests for load_workflow_process_nodes and cycle with Metagenome …
Browse files Browse the repository at this point in the history
…Sequencing
  • Loading branch information
mbthornton-lbl committed Nov 20, 2024
1 parent 0e81082 commit 83a99e2
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 9 deletions.
3 changes: 3 additions & 0 deletions nmdc_automation/workflow_automation/sched.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,12 +294,15 @@ def cycle(self, dryrun: bool = False, skiplist: set = set(),
self.get_existing_jobs.cache_clear()
job_recs = []
for wfp_node in wfp_nodes:
# Skip if we are in the skiplist
if wfp_node.was_informed_by in skiplist:
logging.debug(f"Skipping: {wfp_node.was_informed_by}")
continue
# Skip if the workflow is disabled
if not wfp_node.workflow.enabled:
logging.debug(f"Skipping: {wfp_node.id}, workflow disabled.")
continue
# Find new jobs
jobs = self.find_new_jobs(wfp_node)
for job in jobs:
if dryrun:
Expand Down
21 changes: 21 additions & 0 deletions tests/test_sched.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,27 @@ def test_scheduler_cycle(test_db, mock_api, workflow_file, workflows_config_dir,
resp = jm.cycle()
assert len(resp) == exp_num_jobs_cycle_1


def test_scheduler_cycle_metagenome_sequencing_workflow(test_db, mock_api, workflows_config_dir, site_config_file):
"""
Test basic job creation for metagenome sequencing workflow. Metagenome Sequencing is treated as a WorkflowExecution
instead of a DataGeneration, used for external sequencing data.
"""
reset_db(test_db)
load_fixture(test_db, "data_object_set.json")
load_fixture(test_db, "metagenome_sequencing.json", "workflow_execution_set")

jm = Scheduler(test_db, wfn=workflows_config_dir / "workflows.yaml",
site_conf=site_config_file)
resp = jm.cycle()
assert len(resp) == 1
# Expect 1 unclaimed ReadQcAnalysis job in the response
job = resp[0]
assert job["claims"] == []
assert job["config"]["activity"]["type"] == "nmdc:ReadQcAnalysis"



@mark.parametrize("workflow_file", [
"workflows.yaml",
"workflows-mt.yaml"
Expand Down
52 changes: 43 additions & 9 deletions tests/test_workflow_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
)
def test_load_workflow_process_nodes(test_db, workflow_file, workflows_config_dir):
"""
Test
Test loading of WorkflowProcessNode objects starting with DataObjects, DataGenerations, and WorkflowExecutions
"""
metatranscriptome = False
if workflow_file == "workflows-mt.yaml":
Expand All @@ -24,27 +24,56 @@ def test_load_workflow_process_nodes(test_db, workflow_file, workflows_config_di

wfs = load_workflow_configs(workflows_config_dir / workflow_file)

# these are called by load_activities
# these are called by load_workflow_process_nodes
data_objs_by_id = get_required_data_objects_map(test_db, wfs)
wf_execs = get_current_workflow_process_nodes(test_db, wfs, data_objs_by_id)
assert wf_execs
assert len(wf_execs) == 2

acts = load_workflow_process_nodes(test_db, wfs)
wp_nodes = load_workflow_process_nodes(test_db, wfs)
# sanity check
assert acts
assert len(acts) == 2
assert wp_nodes
assert len(wp_nodes) == 2

# Omics and RQC share data_object_type for metagenome and metatranscriptome
# they can be distinguished by analyte category so we expect 1 of each
# for metagenome and metatranscriptome
data_gen = [act for act in acts if act.type == "nmdc:NucleotideSequencing"][0]
data_gen = [wp_node for wp_node in wp_nodes if wp_node.type == "nmdc:NucleotideSequencing"][0]
assert data_gen
assert data_gen.children
assert len(data_gen.children) == 1
assert data_gen.children[0].type == "nmdc:ReadQcAnalysis"


def test_load_workflow_process_nodes_metagenome_sequencing_workflow(test_db, workflows_config_dir):
"""
Test loading of WorkflowProcessNode objects starting with external sequencing data
which is treated as a WorkflowExecution instead of a DataGeneration
"""
reset_db(test_db)
load_fixture(test_db, "data_object_set.json")
load_fixture(test_db, "metagenome_sequencing.json", "workflow_execution_set")

wfs = load_workflow_configs(workflows_config_dir / "workflows.yaml")

# these are called by load_workflow_process_nodes
data_objs_by_id = get_required_data_objects_map(test_db, wfs)
# sanity check
assert data_objs_by_id
wf_execs = get_current_workflow_process_nodes(test_db, wfs, data_objs_by_id)
assert wf_execs
assert len(wf_execs) == 1

wp_nodes = load_workflow_process_nodes(test_db, wfs)
# sanity check
assert wp_nodes
assert len(wp_nodes) == 1
node = wp_nodes[0]
assert node.type == "nmdc:MetagenomeSequencing"




@mark.parametrize(
"workflow_file", ["workflows.yaml", "workflows-mt.yaml"]
)
Expand All @@ -58,14 +87,12 @@ def test_load_workflows(workflows_config_dir, workflow_file):

shared_wf_names = ["Sequencing Noninterleaved", "Sequencing Interleaved"]
if metatranscriptome:
exp_num_wfs = 9
exp_wf_names = ["Metatranscriptome Reads QC", "Metatranscriptome Reads QC Interleave",
"Metatranscriptome Assembly", "Metatranscriptome Annotation", "Expression Analysis Antisense",
"Expression Analysis Sense", "Expression Analysis Nonstranded", ]
else:
exp_num_wfs = 8
exp_wf_names = ["Reads QC", "Reads QC Interleave", "Metagenome Assembly", "Metagenome Annotation", "MAGs",
"Readbased Analysis", ]
"Readbased Analysis", "Metagenome Sequencing" ]

wfs = load_workflow_configs(workflows_config_dir / workflow_file)
assert wfs
Expand All @@ -77,6 +104,13 @@ def test_load_workflows(workflows_config_dir, workflow_file):
assert wf_name in wfm
wf = wfm[wf_name]
assert wf is not None
# Metagenome Sequencing is a special case
if wf_name == "Metagenome Sequencing":
assert wf.collection is not None
assert wf.enabled
assert wf.analyte_category == "Metagenome"
continue
# Normal workflow
assert wf.type is not None
assert wf.name is not None
assert wf.collection is not None
Expand Down

0 comments on commit 83a99e2

Please sign in to comment.