From cbec96c61233e4792ada4a19c3430a6eea7eb921 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Tue, 19 Nov 2024 16:24:52 -0800 Subject: [PATCH 1/2] Update fixtures to replicate error for wf in workflows: required_types.update(set(wf.data_object_types)) required_data_objs_by_id = dict() for rec in db.data_object_set.find(): do = DataObject(**rec) > if do.data_object_type.code.text not in required_types: E AttributeError: 'NoneType' object has no attribute 'code' --- tests/fixtures/nmdc_db/data_object_set.json | 12 +++++++++++- tests/test_models.py | 9 +++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/tests/fixtures/nmdc_db/data_object_set.json b/tests/fixtures/nmdc_db/data_object_set.json index 1d6e7a5d..0918ae38 100644 --- a/tests/fixtures/nmdc_db/data_object_set.json +++ b/tests/fixtures/nmdc_db/data_object_set.json @@ -815,5 +815,15 @@ "data_object_type": "Metatranscriptome Expression Info File", "url": "https://data.microbiomedata.org", "type": "nmdc:DataObject" - } + }, + { + "id" : "nmdc:dobj-13-zzzyae97", + "name" : "output: SBR_FC_N1_00-10_H2Oext_13Oct15_Leopard_1_01_4404", + "description" : "High resolution MS spectra only", + "file_size_bytes" : 30539306, + "type" : "nmdc:DataObject", + "alternative_identifiers" : [ + "emsl:output_456424" + ] +} ] \ No newline at end of file diff --git a/tests/test_models.py b/tests/test_models.py index 8fb00898..e219f88f 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -118,7 +118,9 @@ def test_data_object_creation_from_records(fixtures_dir): assert data_obj.type == "nmdc:DataObject" assert data_obj.id == record["id"] assert data_obj.name == record["name"] - assert str(data_obj.data_object_type) == record["data_object_type"] + # not all data objects have a data_object_type - e.g. Mass Spectrometry data + if "data_object_type" in record: + assert str(data_obj.data_object_type) == record["data_object_type"] data_obj_dict = yaml.safe_load(yaml_dumper.dumps(data_obj)) assert data_obj_dict == record @@ -136,9 +138,12 @@ def test_data_object_creation_from_db_records(test_db, fixtures_dir): assert data_obj.type == "nmdc:DataObject" assert data_obj.id == db_record["id"] assert data_obj.name == db_record["name"] + # not all data objects have a data_object_type or url - e.g. Mass Spectrometry data + if not db_record.get("data_object_type"): + continue assert str(data_obj.data_object_type) == db_record["data_object_type"] - assert data_obj.description == db_record["description"] assert data_obj.url == db_record["url"] + assert data_obj.description == db_record["description"] assert data_obj.file_size_bytes == db_record.get("file_size_bytes") assert data_obj.md5_checksum == db_record["md5_checksum"] From 7660a91946fae07d4053e0c4a3683a6bc92d1f03 Mon Sep 17 00:00:00 2001 From: Michael Thornton Date: Tue, 19 Nov 2024 16:30:38 -0800 Subject: [PATCH 2/2] filter out data objects with no data_object_type --- nmdc_automation/workflow_automation/workflow_process.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nmdc_automation/workflow_automation/workflow_process.py b/nmdc_automation/workflow_automation/workflow_process.py index 194ac1a6..9e676a83 100644 --- a/nmdc_automation/workflow_automation/workflow_process.py +++ b/nmdc_automation/workflow_automation/workflow_process.py @@ -26,7 +26,7 @@ def get_required_data_objects_map(db, workflows: List[WorkflowConfig]) -> Dict[s required_types.update(set(wf.data_object_types)) required_data_objs_by_id = dict() - for rec in db.data_object_set.find(): + for rec in db.data_object_set.find({"data_object_type": {"$ne": None}}): do = DataObject(**rec) if do.data_object_type.code.text not in required_types: continue