From 98d4322fd0e69cf6e9cbad92ee829f6f1ef07e26 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Fri, 15 Nov 2024 12:36:47 -0800 Subject: [PATCH] add normalization for contamination and completeness --- nmdc_automation/models/nmdc.py | 5 +++++ nmdc_automation/workflow_automation/watch_nmdc.py | 1 - 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/nmdc_automation/models/nmdc.py b/nmdc_automation/models/nmdc.py index 0b964474..0b59044c 100644 --- a/nmdc_automation/models/nmdc.py +++ b/nmdc_automation/models/nmdc.py @@ -84,6 +84,11 @@ def _normalize_mags_record(record: Dict[str, Any]) -> Dict[str, Any]: # add type to eukaryotic_evaluation if it exists if "eukaryotic_evaluation" in mag: record["mags_list"][i]["eukaryotic_evaluation"]["type"] = "nmdc:EukEval" + # conpleteness and contamination need to be converted from string to float + if "completeness" in mag["eukaryotic_evaluation"]: + record["mags_list"][i]["eukaryotic_evaluation"]["completeness"] = float(mag["eukaryotic_evaluation"]["completeness"]) + if "contamination" in mag["eukaryotic_evaluation"]: + record["mags_list"][i]["eukaryotic_evaluation"]["contamination"] = float(mag["eukaryotic_evaluation"]["contamination"]) # gene count should be a positive integer - remove if 'null' if "gene_count" in mag and mag["gene_count"] == "null": mag.pop("gene_count") diff --git a/nmdc_automation/workflow_automation/watch_nmdc.py b/nmdc_automation/workflow_automation/watch_nmdc.py index e5382b23..9425a5fe 100644 --- a/nmdc_automation/workflow_automation/watch_nmdc.py +++ b/nmdc_automation/workflow_automation/watch_nmdc.py @@ -327,7 +327,6 @@ def cycle(self): ) if validation_report.results: logger.error(f"Validation error: {validation_report.results[0].message}") - logger.error(job_dict) continue # post workflow execution and data objects to the runtime api