Skip to content

Commit

Permalink
Add update_metatranscriptome_activity_set method
Browse files Browse the repository at this point in the history
  • Loading branch information
mbthornton-lbl committed Jan 18, 2024
1 parent 7e406ac commit 8938e7b
Showing 1 changed file with 46 additions and 1 deletion.
47 changes: 46 additions & 1 deletion nmdc_automation/re_iding/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from nmdc_automation.re_iding.db_utils import (OMICS_PROCESSING_SET,
READS_QC_SET,
METAGENOME_ASSEMBLY_SET,
METATRANSCRIPTOME_ACTIVITY_SET,
check_for_single_omics_processing_record,
get_data_object_record_by_id,
get_omics_processing_id)
Expand Down Expand Up @@ -325,15 +326,59 @@ def update_metatranscriptome_activity_set(self, db_record: Dict,
f"{db_record[OMICS_PROCESSING_SET][0]['id']}")
new_omics_processing = new_db.omics_processing_set[0]

for metatranscriptome_rec in db_record["metatranscriptome_activity_set"]:
for metatranscriptome_rec in db_record[METATRANSCRIPTOME_ACTIVITY_SET]:
activity_type = "nmdc:MetaT"
omics_processing_id = new_omics_processing.id
has_input = [self._get_input_do_id(new_db, "Filtered Sequencing Reads")]

updated_has_output = []

new_activity_id = self.api_client.minter(activity_type) + "." + self.workflow_iteration
logging.info(f"New activity id created for {omics_processing_id} activity type {activity_type}: {new_activity_id}")

new_metatranscriptome_base_dir = os.path.join(self.data_dir, omics_processing_id,
new_activity_id)
os.makedirs(new_metatranscriptome_base_dir, exist_ok=True)

for old_do_id in metatranscriptome_rec["has_output"]:
logger.info(f"old_do_id: {old_do_id}")
old_do_rec = get_data_object_record_by_id(db_record, old_do_id)
if not old_do_rec:
logger.warning(f"Data object record not found for {old_do_id}")
continue
data_object_type = find_data_object_type(old_do_rec)
if not data_object_type:
logger.warning(f"Data object type not found for {old_do_id}")
continue
new_file_path = get_new_paths(old_do_rec["url"],new_metatranscriptome_base_dir, new_activity_id)
updated_md5, updated_file_size = assembly_file_operations(
old_do_rec, data_object_type, new_file_path, new_activity_id,
self.data_dir)
logging.info(f"New file path computed for {data_object_type}: {new_file_path}")
new_do = self.make_new_data_object(
omics_processing_id, activity_type, new_activity_id, old_do_rec, data_object_type
)
# add new data object to new database and update has_output
new_db.data_object_set.append(new_do)
updated_has_output.append(new_do.id)

# Get new Metatranscriptome activity set
new_metatranscriptome = self._make_new_activity_set_object(
omics_processing_id, new_activity_id, metatranscriptome_rec, has_input,
updated_has_output
)
# update activity-specific properties
# get new_metatranscriptome properties with no set value
unset_properties = [
p for p in new_metatranscriptome.__dict__ if not new_metatranscriptome.__dict__[p]
]
# check for that value in old record
for p in unset_properties:
if p in metatranscriptome_rec:
setattr(new_metatranscriptome, p, metatranscriptome_rec[p])

new_db.metatranscriptome_activity_set.append(new_metatranscriptome)
return new_db


def _get_input_do_id(self, new_db, data_object_type: str):
Expand Down

0 comments on commit 8938e7b

Please sign in to comment.